From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: remove per_cpu__ prefix. Now that the return from alloc_percpu is compatible with the address of per-cpu vars, it makes sense to hand around the address of per-cpu variables. To make this sane, we remove the per_cpu__ prefix we used created to stop people accidentally using these vars directly. Now we have sparse, we can use that (next patch). tj: * Updated to convert stuff which were missed by or added after the original patch. * Kill per_cpu_var() macro. Signed-off-by: Rusty Russell Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- include/linux/percpu-defs.h | 18 ++++++------------ include/linux/percpu.h | 5 ++--- include/linux/vmstat.h | 8 ++++---- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5a5d6ce4bd5..ee99f6c2cdc 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -1,12 +1,6 @@ #ifndef _LINUX_PERCPU_DEFS_H #define _LINUX_PERCPU_DEFS_H -/* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var - /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -56,24 +50,24 @@ */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #else /* * Normal declaration and definition macros. */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #endif /* @@ -137,8 +131,8 @@ /* * Intermodule exports for per-CPU variables. */ -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 522f421ec21..e12410e55e0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -182,7 +182,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(per_cpu_var(var)) __tmp_var__; \ + typeof(var) __tmp_var__; \ __tmp_var__ = get_cpu_var(var); \ put_cpu_var(var); \ __tmp_var__; \ @@ -253,8 +253,7 @@ do { \ /* * Optimized manipulation for memory allocated through the per cpu - * allocator or for addresses of per cpu variables (can be determined - * using per_cpu_var(xx). + * allocator or for addresses of per cpu variables. * * These operation guarantee exclusivity of access for other operations * on the *same* processor. The assumption is that per cpu data is only diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d85889710f9..3e489fda11a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -76,22 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline void __count_vm_event(enum vm_event_item item) { - __this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + __this_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { - this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + this_cpu_inc(vm_event_states.event[item]); } static inline void __count_vm_events(enum vm_event_item item, long delta) { - __this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + __this_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) { - this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + this_cpu_add(vm_event_states.event[item], delta); } extern void all_vm_events(unsigned long *); -- cgit v1.2.3-70-g09d2 From f7b64fe806029e0a0454df132eec3c5ab576102c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make access macros universal Now that per_cpu__ prefix is gone, there's no distinction between static and dynamic percpu variables. Make get_cpu_var() take dynamic percpu variables and ensure that all macros have parentheses around the parameter evaluation and evaluate the variable parameter only once such that any expression which evaluates to percpu address can be used safely. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e12410e55e0..f965f833a64 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -27,10 +27,13 @@ * we force a syntax error here if it isn't. */ #define get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ preempt_disable(); \ &__get_cpu_var(var); })) -#define put_cpu_var(var) preempt_enable() + +#define put_cpu_var(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) #ifdef CONFIG_SMP @@ -182,17 +185,19 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(var) __tmp_var__; \ - __tmp_var__ = get_cpu_var(var); \ - put_cpu_var(var); \ - __tmp_var__; \ + typeof(var) *pr_ptr__ = &(var); \ + typeof(var) pr_ret__; \ + pr_ret__ = get_cpu_var(*pr_ptr__); \ + put_cpu_var(*pr_ptr__); \ + pr_ret__; \ }) #endif #define __percpu_generic_to_op(var, val, op) \ do { \ - get_cpu_var(var) op val; \ - put_cpu_var(var); \ + typeof(var) *pgto_ptr__ = &(var); \ + get_cpu_var(*pgto_ptr__) op val; \ + put_cpu_var(*pgto_ptr__); \ } while (0) #ifndef percpu_write @@ -304,7 +309,7 @@ do { \ #define _this_cpu_generic_to_op(pcp, val, op) \ do { \ preempt_disable(); \ - *__this_cpu_ptr(&pcp) op val; \ + *__this_cpu_ptr(&(pcp)) op val; \ preempt_enable(); \ } while (0) -- cgit v1.2.3-70-g09d2 From e0fdb0e050eae331046385643618f12452aa7e73 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: add __percpu for sparse. We have to make __kernel "__attribute__((address_space(0)))" so we can cast to it. tj: * put_cpu_var() update. * Annotations added to dynamic allocator interface. Signed-off-by: Rusty Russell Cc: Al Viro Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 4 +++- include/linux/compiler.h | 4 +++- include/linux/percpu-defs.h | 2 +- include/linux/percpu.h | 18 +++++++++++------- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ca6f0491412..fded453fd25 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -41,7 +41,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PERCPU_PTR -#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +/* Weird cast keeps both GCC and sparse happy. */ +#define SHIFT_PERCPU_PTR(__p, __offset) \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e..abba8045c6e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -5,7 +5,7 @@ #ifdef __CHECKER__ # define __user __attribute__((noderef, address_space(1))) -# define __kernel /* default address space */ +# define __kernel __attribute__((address_space(0))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ee99f6c2cdc..0fa0cb52425 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -12,7 +12,7 @@ * that section. */ #define __PCPU_ATTRS(sec) \ - __attribute__((section(PER_CPU_BASE_SECTION sec))) \ + __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ PER_CPU_ATTRIBUTES #define __PCPU_DUMMY_ATTRS \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f965f833a64..2c0d31a3f6b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -30,8 +30,12 @@ preempt_disable(); \ &__get_cpu_var(var); })) +/* + * The weird & is necessary because sparse considers (void)(var) to be + * a direct dereference of percpu variable (var). + */ #define put_cpu_var(var) do { \ - (void)(var); \ + (void)&(var); \ preempt_enable(); \ } while (0) @@ -130,9 +134,9 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -extern void *__alloc_reserved_percpu(size_t size, size_t align); -extern void *__alloc_percpu(size_t size, size_t align); -extern void free_percpu(void *__pdata); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void __percpu *__pdata); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void __init setup_per_cpu_areas(void); @@ -142,7 +146,7 @@ extern void __init setup_per_cpu_areas(void); #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static inline void __percpu *__alloc_percpu(size_t size, size_t align) { /* * Can't easily make larger alignment work with kmalloc. WARN @@ -153,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) return kzalloc(size, GFP_KERNEL); } -static inline void free_percpu(void *p) +static inline void free_percpu(void __percpu *p) { kfree(p); } @@ -168,7 +172,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #endif /* CONFIG_SMP */ #define alloc_percpu(type) \ - (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type)) + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) /* * Optional methods for optimized non-lvalue per-cpu variable access. -- cgit v1.2.3-70-g09d2 From 545695fb41da117928ab946067a42d9e15fd009d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make accessors check for percpu pointer in sparse The previous patch made sparse warn about percpu variables being used directly without going through percpu accessors. This patch implements the other half - checking whether non percpu variable is passed into percpu accessors. Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: Al Viro --- include/asm-generic/percpu.h | 6 ++++-- include/linux/percpu-defs.h | 20 ++++++++++++++++++-- include/linux/percpu.h | 2 ++ 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index fded453fd25..04f91c2d3f7 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -42,8 +42,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #ifndef SHIFT_PERCPU_PTR /* Weird cast keeps both GCC and sparse happy. */ -#define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) +#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ + __verify_pcpu_ptr((__p)); \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ +}) #endif /* diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 0fa0cb52425..1fa36eb54b6 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -18,6 +18,16 @@ #define __PCPU_DUMMY_ATTRS \ __attribute__((section(".discard"), unused)) +/* + * Macro which verifies @ptr is a percpu pointer without evaluating + * @ptr. This is to be used in percpu accessors to verify that the + * input parameter is a percpu pointer. + */ +#define __verify_pcpu_ptr(ptr) do { \ + void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + (void)__vpp_verify; \ +} while (0) + /* * s390 and alpha modules require percpu variables to be defined as * weak to force the compiler to generate GOT based external @@ -129,10 +139,16 @@ __aligned(PAGE_SIZE) /* - * Intermodule exports for per-CPU variables. + * Intermodule exports for per-CPU variables. sparse forgets about + * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to + * noop if __CHECKER__. */ +#ifndef __CHECKER__ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) - +#else +#define EXPORT_PER_CPU_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) +#endif #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2c0d31a3f6b..42878f0cd0e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -237,6 +237,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call_return(stem, variable) \ ({ typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable);break; \ case 2: pscr_ret__ = stem##2(variable);break; \ @@ -250,6 +251,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call(stem, variable, ...) \ do { \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ case 2: stem##2(variable, __VA_ARGS__);break; \ -- cgit v1.2.3-70-g09d2 From e169cfbef46d62e042614ffafa8880eed1d894bb Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 23 Nov 2009 14:53:09 -0700 Subject: of/flattree: merge find_flat_dt_string and initial_boot_params Merge common code between Microblaze and PowerPC. Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Tested-by: Michal Simek --- arch/microblaze/Kconfig | 1 + arch/microblaze/kernel/prom.c | 8 -------- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/prom.c | 12 ------------ drivers/of/Kconfig | 4 ++++ drivers/of/Makefile | 1 + drivers/of/fdt.c | 21 +++++++++++++++++++++ include/linux/of_fdt.h | 4 ++++ 8 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 drivers/of/fdt.c (limited to 'include/linux') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index bbd8327f189..f39c9275a29 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -111,6 +111,7 @@ config CMDLINE_FORCE config OF def_bool y + select OF_FLATTREE config PROC_DEVICETREE bool "Support for device tree in /proc" diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index b817df172aa..06d620ab416 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -47,17 +47,9 @@ static int __initdata dt_root_size_cells; typedef u32 cell_t; -static struct boot_param_header *initial_boot_params; - /* export that to outside world */ struct device_node *of_chosen; -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - /** * This function is used to scan the flattened device-tree, it is * used to extract the memory informations at boot before we can diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2ba14e77296..2a75c6ae2a8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -163,6 +163,7 @@ config PPC_OF config OF def_bool y + select OF_FLATTREE config PPC_UDBG_16550 bool diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4ec30086246..fccf7e49bb2 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -73,12 +73,6 @@ unsigned long tce_alloc_start, tce_alloc_end; typedef u32 cell_t; -#if 0 -static struct boot_param_header *initial_boot_params __initdata; -#else -struct boot_param_header *initial_boot_params; -#endif - extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ @@ -86,12 +80,6 @@ extern rwlock_t devtree_lock; /* temporary while merging */ /* export that to outside world */ struct device_node *of_chosen; -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - /** * This function is used to scan the flattened device-tree, it is * used to extract the memory informations at boot before we can diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index d2fa27c5c1b..462825e0312 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -1,3 +1,7 @@ +config OF_FLATTREE + bool + depends on OF + config OF_DEVICE def_bool y depends on OF && (SPARC || PPC_OF || MICROBLAZE) diff --git a/drivers/of/Makefile b/drivers/of/Makefile index bdfb5f5d4b0..f232cc98ce0 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -1,4 +1,5 @@ obj-y = base.o +obj-$(CONFIG_OF_FLATTREE) += fdt.o obj-$(CONFIG_OF_DEVICE) += device.o platform.o obj-$(CONFIG_OF_GPIO) += gpio.o obj-$(CONFIG_OF_I2C) += of_i2c.o diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c new file mode 100644 index 00000000000..9faa9a5cbdf --- /dev/null +++ b/drivers/of/fdt.c @@ -0,0 +1,21 @@ +/* + * Functions for working with the Flattened Device Tree data format + * + * Copyright 2009 Benjamin Herrenschmidt, IBM Corp + * benh@kernel.crashing.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include + +struct boot_param_header *initial_boot_params; + +char *find_flat_dt_string(u32 offset) +{ + return ((char *)initial_boot_params) + + initial_boot_params->off_dt_strings + offset; +} diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 41d432b1355..d1a79f3da78 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -57,7 +57,11 @@ struct boot_param_header { u32 dt_struct_size; /* size of the DT structure block */ }; +/* TBD: Temporary export of fdt globals - remove when code fully merged */ +extern struct boot_param_header *initial_boot_params; + /* For scanning the flat device-tree at boot time */ +extern char *find_flat_dt_string(u32 offset); extern int __init of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), -- cgit v1.2.3-70-g09d2 From 31a6a87dfc34fbf02aef9a160adf558ec56d3ccd Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 23 Nov 2009 19:49:38 -0700 Subject: of/flattree: remove __init annotations from the header file __init annotation belongs in the .c file, not the header. Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Tested-by: Michal Simek --- include/linux/of_fdt.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index d1a79f3da78..81231e04e8f 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -62,15 +62,13 @@ extern struct boot_param_header *initial_boot_params; /* For scanning the flat device-tree at boot time */ extern char *find_flat_dt_string(u32 offset); -extern int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -extern void __init *of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); -extern int __init of_flat_dt_is_compatible(unsigned long node, - const char *name); -extern unsigned long __init of_get_flat_dt_root(void); +extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, + int depth, void *data), + void *data); +extern void *of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size); +extern int of_flat_dt_is_compatible(unsigned long node, const char *name); +extern unsigned long of_get_flat_dt_root(void); /* Other Prototypes */ extern void finish_device_tree(void); -- cgit v1.2.3-70-g09d2 From bbd33931a08362f78266a4016211a35947b91041 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 23 Nov 2009 20:07:00 -0700 Subject: of/flattree: Merge unflatten_dt_node Merge common code between PowerPC and MicroBlaze Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Tested-by: Michal Simek --- arch/microblaze/kernel/prom.c | 195 ---------------------------------------- arch/powerpc/kernel/prom.c | 194 ---------------------------------------- drivers/of/fdt.c | 200 ++++++++++++++++++++++++++++++++++++++++++ include/linux/of_fdt.h | 4 + 4 files changed, 204 insertions(+), 389 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index eb27bd3a39b..021770abfbd 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -50,201 +50,6 @@ typedef u32 cell_t; /* export that to outside world */ struct device_node *of_chosen; -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char *)np) + sizeof(struct device_node); - if (new_format) { - char *p2 = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p2, dad->full_name); -#ifdef DEBUG - if ((strlen(p2) + l + 1) != allocl) { - pr_debug("%s: p: %d, l: %d, a: %d\n", - pathp, (int)strlen(p2), - l, allocl); - } -#endif - p2 += strlen(p2); - } - *(p2++) = '/'; - memcpy(p2, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == NULL) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while (1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk(KERN_INFO - "Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p1 = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p1) { - if ((*p1) == '@') - pa = p1; - if ((*p1) == '/') - ps = p1 + 1; - p1++; - } - if (pa < ps) - pa = p1; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = pp + 1; - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - pr_debug("fixed up name for %s -> %s\n", pathp, - (char *)pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - - if (!np->name) - np->name = ""; - if (!np->type) - np->type = ""; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk(KERN_INFO "Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - /** * unflattens the device-tree passed by the firmware, creating the * tree of struct device_node. It also fills the "name" and "type" diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 413e608863d..a102a0a33ed 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -80,200 +80,6 @@ extern rwlock_t devtree_lock; /* temporary while merging */ /* export that to outside world */ struct device_node *of_chosen; -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, (int)strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == 0) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while(1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk("Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = pp + 1; - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, - (char *)pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - - if (!np->name) - np->name = ""; - if (!np->type) - np->type = ""; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - static int __init early_parse_mem(char *p) { if (!p) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 5cdd958db9a..6852ecf6d1e 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -166,3 +166,203 @@ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) return 0; } +static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, + unsigned long align) +{ + void *res; + + *mem = _ALIGN(*mem, align); + res = (void *)*mem; + *mem += size; + + return res; +} + +/** + * unflatten_dt_node - Alloc and populate a device_node from the flat tree + * @p: pointer to node in flat tree + * @dad: Parent struct device_node + * @allnextpp: pointer to ->allnext from last allocated device_node + * @fpsize: Size of the node path up at the current depth. + */ +unsigned long __init unflatten_dt_node(unsigned long mem, + unsigned long *p, + struct device_node *dad, + struct device_node ***allnextpp, + unsigned long fpsize) +{ + struct device_node *np; + struct property *pp, **prev_pp = NULL; + char *pathp; + u32 tag; + unsigned int l, allocl; + int has_name = 0; + int new_format = 0; + + tag = *((u32 *)(*p)); + if (tag != OF_DT_BEGIN_NODE) { + pr_err("Weird tag at start of node: %x\n", tag); + return mem; + } + *p += 4; + pathp = (char *)*p; + l = allocl = strlen(pathp) + 1; + *p = _ALIGN(*p + l, 4); + + /* version 0x10 has a more compact unit name here instead of the full + * path. we accumulate the full path size using "fpsize", we'll rebuild + * it later. We detect this because the first character of the name is + * not '/'. + */ + if ((*pathp) != '/') { + new_format = 1; + if (fpsize == 0) { + /* root node: special case. fpsize accounts for path + * plus terminating zero. root node only has '/', so + * fpsize should be 2, but we want to avoid the first + * level nodes to have two '/' so we use fpsize 1 here + */ + fpsize = 1; + allocl = 2; + } else { + /* account for '/' and path size minus terminal 0 + * already in 'l' + */ + fpsize += l; + allocl = fpsize; + } + } + + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, + __alignof__(struct device_node)); + if (allnextpp) { + memset(np, 0, sizeof(*np)); + np->full_name = ((char *)np) + sizeof(struct device_node); + if (new_format) { + char *fn = np->full_name; + /* rebuild full path for new format */ + if (dad && dad->parent) { + strcpy(fn, dad->full_name); +#ifdef DEBUG + if ((strlen(fn) + l + 1) != allocl) { + pr_debug("%s: p: %d, l: %d, a: %d\n", + pathp, (int)strlen(fn), + l, allocl); + } +#endif + fn += strlen(fn); + } + *(fn++) = '/'; + memcpy(fn, pathp, l); + } else + memcpy(np->full_name, pathp, l); + prev_pp = &np->properties; + **allnextpp = np; + *allnextpp = &np->allnext; + if (dad != NULL) { + np->parent = dad; + /* we temporarily use the next field as `last_child'*/ + if (dad->next == NULL) + dad->child = np; + else + dad->next->sibling = np; + dad->next = np; + } + kref_init(&np->kref); + } + while (1) { + u32 sz, noff; + char *pname; + + tag = *((u32 *)(*p)); + if (tag == OF_DT_NOP) { + *p += 4; + continue; + } + if (tag != OF_DT_PROP) + break; + *p += 4; + sz = *((u32 *)(*p)); + noff = *((u32 *)((*p) + 4)); + *p += 8; + if (initial_boot_params->version < 0x10) + *p = _ALIGN(*p, sz >= 8 ? 8 : 4); + + pname = find_flat_dt_string(noff); + if (pname == NULL) { + pr_info("Can't find property name in list !\n"); + break; + } + if (strcmp(pname, "name") == 0) + has_name = 1; + l = strlen(pname) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property), + __alignof__(struct property)); + if (allnextpp) { + if (strcmp(pname, "linux,phandle") == 0) { + np->node = *((u32 *)*p); + if (np->linux_phandle == 0) + np->linux_phandle = np->node; + } + if (strcmp(pname, "ibm,phandle") == 0) + np->linux_phandle = *((u32 *)*p); + pp->name = pname; + pp->length = sz; + pp->value = (void *)*p; + *prev_pp = pp; + prev_pp = &pp->next; + } + *p = _ALIGN((*p) + sz, 4); + } + /* with version 0x10 we may not have the name property, recreate + * it here from the unit name if absent + */ + if (!has_name) { + char *p1 = pathp, *ps = pathp, *pa = NULL; + int sz; + + while (*p1) { + if ((*p1) == '@') + pa = p1; + if ((*p1) == '/') + ps = p1 + 1; + p1++; + } + if (pa < ps) + pa = p1; + sz = (pa - ps) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, + __alignof__(struct property)); + if (allnextpp) { + pp->name = "name"; + pp->length = sz; + pp->value = pp + 1; + *prev_pp = pp; + prev_pp = &pp->next; + memcpy(pp->value, ps, sz - 1); + ((char *)pp->value)[sz - 1] = 0; + pr_debug("fixed up name for %s -> %s\n", pathp, + (char *)pp->value); + } + } + if (allnextpp) { + *prev_pp = NULL; + np->name = of_get_property(np, "name", NULL); + np->type = of_get_property(np, "device_type", NULL); + + if (!np->name) + np->name = ""; + if (!np->type) + np->type = ""; + } + while (tag == OF_DT_BEGIN_NODE) { + mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); + tag = *((u32 *)(*p)); + } + if (tag != OF_DT_END_NODE) { + pr_err("Weird tag at end of node: %x\n", tag); + return mem; + } + *p += 4; + return mem; +} diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 81231e04e8f..ace9068e07e 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -69,6 +69,10 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); +extern unsigned long unflatten_dt_node(unsigned long mem, unsigned long *p, + struct device_node *dad, + struct device_node ***allnextpp, + unsigned long fpsize); /* Other Prototypes */ extern void finish_device_tree(void); -- cgit v1.2.3-70-g09d2 From 41f880091c15b039ffcc8b3d831656b81517a6d3 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 23 Nov 2009 20:07:01 -0700 Subject: of/flattree: Merge unflatten_device_tree Merge common code between PowerPC and MicroBlaze Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Tested-by: Michal Simek --- arch/microblaze/include/asm/prom.h | 1 - arch/microblaze/kernel/prom.c | 49 ----------------------------------- arch/powerpc/kernel/prom.c | 50 ------------------------------------ drivers/of/fdt.c | 52 ++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 3 +++ include/linux/of_fdt.h | 4 --- 6 files changed, 55 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index ef3ec1d6ceb..07d1063f9aa 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -37,7 +37,6 @@ extern struct device_node *of_chosen; #define HAVE_ARCH_DEVTREE_FIXUPS -extern struct device_node *allnodes; /* temporary while merging */ extern rwlock_t devtree_lock; /* temporary while merging */ /* For updating the device tree at runtime */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 021770abfbd..901d538c15e 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -50,55 +50,6 @@ typedef u32 cell_t; /* export that to outside world */ struct device_node *of_chosen; -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - - pr_debug(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - pr_debug(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - mem = (unsigned long) __va(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - pr_debug(" unflattening %lx...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", - *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4]); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - if (of_chosen == NULL) - of_chosen = of_find_node_by_path("/chosen@0"); - - pr_debug(" <- unflatten_device_tree()\n"); -} - #define early_init_dt_scan_drconf_memory(node) 0 static int __init early_init_dt_scan_cpus(unsigned long node, diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a102a0a33ed..1280f3484ad 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -73,8 +73,6 @@ unsigned long tce_alloc_start, tce_alloc_end; typedef u32 cell_t; -extern struct device_node *allnodes; /* temporary while merging */ - extern rwlock_t devtree_lock; /* temporary while merging */ /* export that to outside world */ @@ -119,54 +117,6 @@ static void __init move_device_tree(void) DBG("<- move_device_tree\n"); } -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - - DBG(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - DBG(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - mem = (unsigned long) __va(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - DBG(" unflattening %lx...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - if (of_chosen == NULL) - of_chosen = of_find_node_by_path("/chosen@0"); - - DBG(" <- unflatten_device_tree()\n"); -} - /* * ibm,pa-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 6852ecf6d1e..43d236cbc17 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -9,6 +9,8 @@ * version 2 as published by the Free Software Foundation. */ +#include +#include #include #include @@ -366,3 +368,53 @@ unsigned long __init unflatten_dt_node(unsigned long mem, *p += 4; return mem; } + +/** + * unflatten_device_tree - create tree of device_nodes from flat blob + * + * unflattens the device-tree passed by the firmware, creating the + * tree of struct device_node. It also fills the "name" and "type" + * pointers of the nodes so the normal device-tree walking functions + * can be used. + */ +void __init unflatten_device_tree(void) +{ + unsigned long start, mem, size; + struct device_node **allnextp = &allnodes; + + pr_debug(" -> unflatten_device_tree()\n"); + + /* First pass, scan for size */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + size = unflatten_dt_node(0, &start, NULL, NULL, 0); + size = (size | 3) + 1; + + pr_debug(" size is %lx, allocating...\n", size); + + /* Allocate memory for the expanded device tree */ + mem = lmb_alloc(size + 4, __alignof__(struct device_node)); + mem = (unsigned long) __va(mem); + + ((u32 *)mem)[size / 4] = 0xdeadbeef; + + pr_debug(" unflattening %lx...\n", mem); + + /* Second pass, do actual unflattening */ + start = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + unflatten_dt_node(mem, &start, NULL, &allnextp, 0); + if (*((u32 *)start) != OF_DT_END) + pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start)); + if (((u32 *)mem)[size / 4] != 0xdeadbeef) + pr_warning("End of tree marker overwritten: %08x\n", + ((u32 *)mem)[size / 4]); + *allnextp = NULL; + + /* Get pointer to OF "/chosen" node for use everywhere */ + of_chosen = of_find_node_by_path("/chosen"); + if (of_chosen == NULL) + of_chosen = of_find_node_by_path("/chosen@0"); + + pr_debug(" <- unflatten_device_tree()\n"); +} diff --git a/include/linux/of.h b/include/linux/of.h index e7facd8fbce..bec215792c4 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -63,6 +63,9 @@ struct device_node { #endif }; +/* Pointer for first entry in chain of all nodes. */ +extern struct device_node *allnodes; + static inline int of_node_check_flag(struct device_node *n, unsigned long flag) { return test_bit(flag, &n->_flags); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ace9068e07e..81231e04e8f 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -69,10 +69,6 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); -extern unsigned long unflatten_dt_node(unsigned long mem, unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize); /* Other Prototypes */ extern void finish_device_tree(void); -- cgit v1.2.3-70-g09d2 From 2be09cb993826b52c9fc1d44747c20dd43a50038 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 23 Nov 2009 20:16:46 -0700 Subject: of: remove special case definition of of_read_ulong() Special case of of_read_ulong() was defined for PPC32 to toss away all but the last 32 bits when a large number value was read, and the 'normal' version for ppc64 just #defined of_read_ulong to of_read_number which causes compiler warnings on MicroBlaze and other 32 bit architectures because it returns a u64 instead of a ulong. This patch fixes the problem by defining a common implementation of of_read_ulong() that works everywhere. Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Tested-by: Michal Simek --- include/linux/of.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index bec215792c4..d4c014a35ea 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -113,14 +113,11 @@ static inline u64 of_read_number(const u32 *cell, int size) } /* Like of_read_number, but we want an unsigned long result */ -#ifdef CONFIG_PPC32 static inline unsigned long of_read_ulong(const u32 *cell, int size) { - return cell[size-1]; + /* toss away upper bits if unsigned long is smaller than u64 */ + return of_read_number(cell, size); } -#else -#define of_read_ulong(cell, size) of_read_number(cell, size) -#endif #include -- cgit v1.2.3-70-g09d2 From d2eecb03936878ec574ade5532fa83df7d75dde7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 7 Dec 2009 10:36:20 -0500 Subject: ext4: Use slab allocator for sub-page sized allocations Now that the SLUB seems to be fixed so that it respects the requested alignment, use kmem_cache_alloc() to allocator if the block size of the buffer heads to be allocated is less than the page size. Previously, we were using 16k page on a Power system for each buffer, even when the file system was using 1k or 4k block size. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/jbd2.h | 11 +---- 2 files changed, 134 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ac0d027595d..c03d4dce4d7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #define CREATE_TRACE_POINTS #include @@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int jbd2_journal_create_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (jbd2_journal_recover(journal)) @@ -1806,6 +1816,127 @@ size_t journal_tag_bytes(journal_t *journal) return JBD2_TAG_SIZE32; } +/* + * JBD memory management + * + * These functions are used to allocate block-sized chunks of memory + * used for making copies of buffer_head data. Very often it will be + * page-sized chunks of data, but sometimes it will be in + * sub-page-size chunks. (For example, 16k pages on Power systems + * with a 4k block file system.) For blocks smaller than a page, we + * use a SLAB allocator. There are slab caches for each block size, + * which are allocated at mount time, if necessary, and we only free + * (all of) the slab caches when/if the jbd2 module is unloaded. For + * this reason we don't need to a mutex to protect access to + * jbd2_slab[] allocating or releasing memory; only in + * jbd2_journal_create_slab(). + */ +#define JBD2_MAX_SLABS 8 +static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; +static DECLARE_MUTEX(jbd2_slab_create_sem); + +static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { + "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", + "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" +}; + + +static void jbd2_journal_destroy_slabs(void) +{ + int i; + + for (i = 0; i < JBD2_MAX_SLABS; i++) { + if (jbd2_slab[i]) + kmem_cache_destroy(jbd2_slab[i]); + jbd2_slab[i] = NULL; + } +} + +static int jbd2_journal_create_slab(size_t size) +{ + int i = order_base_2(size) - 10; + size_t slab_size; + + if (size == PAGE_SIZE) + return 0; + + if (i >= JBD2_MAX_SLABS) + return -EINVAL; + + if (unlikely(i < 0)) + i = 0; + down(&jbd2_slab_create_sem); + if (jbd2_slab[i]) { + up(&jbd2_slab_create_sem); + return 0; /* Already created */ + } + + slab_size = 1 << (i+10); + jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, + slab_size, 0, NULL); + up(&jbd2_slab_create_sem); + if (!jbd2_slab[i]) { + printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +static struct kmem_cache *get_slab(size_t size) +{ + int i = order_base_2(size) - 10; + + BUG_ON(i >= JBD2_MAX_SLABS); + if (unlikely(i < 0)) + i = 0; + BUG_ON(jbd2_slab[i] == 0); + return jbd2_slab[i]; +} + +void *jbd2_alloc(size_t size, gfp_t flags) +{ + void *ptr; + + BUG_ON(size & (size-1)); /* Must be a power of 2 */ + + flags |= __GFP_REPEAT; + if (size == PAGE_SIZE) + ptr = (void *)__get_free_pages(flags, 0); + else if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + ptr = (void *)__get_free_pages(flags, order); + else + ptr = vmalloc(size); + } else + ptr = kmem_cache_alloc(get_slab(size), flags); + + /* Check alignment; SLUB has gotten this wrong in the past, + * and this can lead to user data corruption! */ + BUG_ON(((unsigned long) ptr) & (size-1)); + + return ptr; +} + +void jbd2_free(void *ptr, size_t size) +{ + if (size == PAGE_SIZE) { + free_pages((unsigned long)ptr, 0); + return; + } + if (size > PAGE_SIZE) { + int order = get_order(size); + + if (order < 3) + free_pages((unsigned long)ptr, order); + else + vfree(ptr); + return; + } + kmem_cache_free(get_slab(size), ptr); +}; + /* * Journal_head storage management */ @@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void) jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_slabs(); } static int __init journal_init(void) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 638ce4554c7..8ada2a129d0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -69,15 +69,8 @@ extern u8 jbd2_journal_enable_debug; #define jbd_debug(f, a...) /**/ #endif -static inline void *jbd2_alloc(size_t size, gfp_t flags) -{ - return (void *)__get_free_pages(flags, get_order(size)); -} - -static inline void jbd2_free(void *ptr, size_t size) -{ - free_pages((unsigned long)ptr, get_order(size)); -}; +extern void *jbd2_alloc(size_t size, gfp_t flags); +extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 -- cgit v1.2.3-70-g09d2 From 85438592f179c126ad4cb9a280046d4f0a501e6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Nov 2009 17:53:21 +0900 Subject: percpu: remove compile warnings caused by __verify_pcpu_ptr() If percpu pointer is const, __verify_pcpu_ptr() triggers warnings like the following. drivers/net/loopback.c: In function 'loopback_get_stats': drivers/net/loopback.c:109: warning: initialization discards qualifiers from pointer target type Fix it by adding const to the verification target pointer used in __verify_pcpu_ptr(). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 1fa36eb54b6..68567c0b3a5 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -24,7 +24,7 @@ * input parameter is a percpu pointer. */ #define __verify_pcpu_ptr(ptr) do { \ - void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + const void __percpu *__vpp_verify = (typeof(ptr))NULL; \ (void)__vpp_verify; \ } while (0) -- cgit v1.2.3-70-g09d2 From f7b3a8355ba6cad251297844a0bdd08898ea36e0 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 24 Nov 2009 03:26:58 -0700 Subject: of/flattree: Merge early_init_dt_check_for_initrd() Merge common code between PowerPC and Microblaze Signed-off-by: Grant Likely Tested-by: Wolfram Sang Acked-by: Benjamin Herrenschmidt --- arch/microblaze/kernel/prom.c | 32 -------------------------------- arch/powerpc/kernel/prom.c | 30 ------------------------------ drivers/of/fdt.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/of_fdt.h | 1 + 4 files changed, 38 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index a38e3733a09..7959495b1d0 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -113,38 +113,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init early_init_dt_check_for_initrd(unsigned long node) -{ - unsigned long l; - u32 *prop; - - pr_debug("Looking for initrd properties... "); - - prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); - if (prop) { - initrd_start = (unsigned long) - __va((u32)of_read_ulong(prop, l/4)); - - prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); - if (prop) { - initrd_end = (unsigned long) - __va((u32)of_read_ulong(prop, 1/4)); - initrd_below_start_ok = 1; - } else { - initrd_start = 0; - } - } - - pr_debug("initrd_start=0x%lx initrd_end=0x%lx\n", - initrd_start, initrd_end); -} -#else -static inline void early_init_dt_check_for_initrd(unsigned long node) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - static int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7f885665514..1ecd6c6ecab 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -373,36 +373,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init early_init_dt_check_for_initrd(unsigned long node) -{ - unsigned long l; - u32 *prop; - - DBG("Looking for initrd properties... "); - - prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); - if (prop) { - initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4)); - - prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); - if (prop) { - initrd_end = (unsigned long) - __va(of_read_ulong(prop, l/4)); - initrd_below_start_ok = 1; - } else { - initrd_start = 0; - } - } - - DBG("initrd_start=0x%lx initrd_end=0x%lx\n", initrd_start, initrd_end); -} -#else -static inline void early_init_dt_check_for_initrd(unsigned long node) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - static int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 43d236cbc17..6ad98e85dc9 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -369,6 +370,42 @@ unsigned long __init unflatten_dt_node(unsigned long mem, return mem; } +#ifdef CONFIG_BLK_DEV_INITRD +/** + * early_init_dt_check_for_initrd - Decode initrd location from flat tree + * @node: reference to node containing initrd location ('chosen') + */ +void __init early_init_dt_check_for_initrd(unsigned long node) +{ + unsigned long len; + u32 *prop; + + pr_debug("Looking for initrd properties... "); + + prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len); + if (prop) { + initrd_start = (unsigned long) + __va(of_read_ulong(prop, len/4)); + + prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len); + if (prop) { + initrd_end = (unsigned long) + __va(of_read_ulong(prop, len/4)); + initrd_below_start_ok = 1; + } else { + initrd_start = 0; + } + } + + pr_debug("initrd_start=0x%lx initrd_end=0x%lx\n", + initrd_start, initrd_end); +} +#else +inline void early_init_dt_check_for_initrd(unsigned long node) +{ +} +#endif /* CONFIG_BLK_DEV_INITRD */ + /** * unflatten_device_tree - create tree of device_nodes from flat blob * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 81231e04e8f..ec2db8278c3 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -69,6 +69,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); +extern void early_init_dt_check_for_initrd(unsigned long node); /* Other Prototypes */ extern void finish_device_tree(void); -- cgit v1.2.3-70-g09d2 From f00abd94918c9780f9d2d961fc0e419c11457922 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 24 Nov 2009 03:27:10 -0700 Subject: of/flattree: Merge earlyinit_dt_scan_root() Merge common code between PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Tested-by: Wolfram Sang --- arch/microblaze/kernel/prom.c | 23 ----------------------- arch/powerpc/kernel/prom.c | 24 ------------------------ drivers/of/fdt.c | 26 ++++++++++++++++++++++++++ include/linux/of_fdt.h | 6 ++++++ 4 files changed, 32 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 7959495b1d0..189179a9b55 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -42,9 +42,6 @@ #include #include -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; - typedef u32 cell_t; /* export that to outside world */ @@ -158,26 +155,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 1; } -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - pr_debug("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - static u64 __init dt_mem_next_cell(int s, cell_t **cellp) { cell_t *p = *cellp; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1ecd6c6ecab..78f65a4d8b0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -61,10 +61,6 @@ #define DBG(fmt...) #endif - -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; - #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -436,26 +432,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 1; } -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - static u64 __init dt_mem_next_cell(int s, cell_t **cellp) { cell_t *p = *cellp; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 6ad98e85dc9..be200be4726 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -15,6 +15,9 @@ #include #include +int __initdata dt_root_addr_cells; +int __initdata dt_root_size_cells; + struct boot_param_header *initial_boot_params; char *find_flat_dt_string(u32 offset) @@ -406,6 +409,29 @@ inline void early_init_dt_check_for_initrd(unsigned long node) } #endif /* CONFIG_BLK_DEV_INITRD */ +/** + * early_init_dt_scan_root - fetch the top level address and size cells + */ +int __init early_init_dt_scan_root(unsigned long node, const char *uname, + int depth, void *data) +{ + u32 *prop; + + if (depth != 0) + return 0; + + prop = of_get_flat_dt_prop(node, "#size-cells", NULL); + dt_root_size_cells = (prop == NULL) ? 1 : *prop; + pr_debug("dt_root_size_cells = %x\n", dt_root_size_cells); + + prop = of_get_flat_dt_prop(node, "#address-cells", NULL); + dt_root_addr_cells = (prop == NULL) ? 2 : *prop; + pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells); + + /* break now */ + return 1; +} + /** * unflatten_device_tree - create tree of device_nodes from flat blob * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ec2db8278c3..828c3cdaea7 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -58,6 +58,8 @@ struct boot_param_header { }; /* TBD: Temporary export of fdt globals - remove when code fully merged */ +extern int __initdata dt_root_addr_cells; +extern int __initdata dt_root_size_cells; extern struct boot_param_header *initial_boot_params; /* For scanning the flat device-tree at boot time */ @@ -71,6 +73,10 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); extern void early_init_dt_check_for_initrd(unsigned long node); +/* Early flat tree scan hooks */ +extern int early_init_dt_scan_root(unsigned long node, const char *uname, + int depth, void *data); + /* Other Prototypes */ extern void finish_device_tree(void); extern void unflatten_device_tree(void); -- cgit v1.2.3-70-g09d2 From 83f7a06eb479e2aeb83536e77a2cb14cc2285e32 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 24 Nov 2009 03:37:56 -0700 Subject: of/flattree: merge dt_mem_next_cell Merge common code between PowerPC and Microblaze Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Tested-by: Wolfram Sang --- arch/microblaze/kernel/prom.c | 8 -------- arch/powerpc/kernel/prom.c | 8 -------- drivers/of/fdt.c | 8 ++++++++ include/linux/of_fdt.h | 1 + 4 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 189179a9b55..e0f4c34ed0f 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -155,14 +155,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 1; } -static u64 __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - - *cellp = p + s; - return of_read_number(p, s); -} - static int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 78f65a4d8b0..048e3a3e987 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -432,14 +432,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 1; } -static u64 __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - - *cellp = p + s; - return of_read_number(p, s); -} - #ifdef CONFIG_PPC_PSERIES /* * Interpret the ibm,dynamic-memory property in the diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index be200be4726..ebce509b088 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -432,6 +432,14 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname, return 1; } +u64 __init dt_mem_next_cell(int s, u32 **cellp) +{ + u32 *p = *cellp; + + *cellp = p + s; + return of_read_number(p, s); +} + /** * unflatten_device_tree - create tree of device_nodes from flat blob * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 828c3cdaea7..d1a37e56031 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -72,6 +72,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); extern void early_init_dt_check_for_initrd(unsigned long node); +extern u64 dt_mem_next_cell(int s, u32 **cellp); /* Early flat tree scan hooks */ extern int early_init_dt_scan_root(unsigned long node, const char *uname, -- cgit v1.2.3-70-g09d2 From 86e032213424958b45564d0cc96b3316641a49d3 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 10 Dec 2009 23:42:21 -0700 Subject: of/flattree: merge early_init_dt_scan_chosen() Merge common code between PowerPC and Microblaze. This patch splits the arch-specific stuff out into a new function, early_init_dt_scan_chosen_arch(). Signed-off-by: Grant Likely Tested-by: Wolfram Sang Acked-by: Benjamin Herrenschmidt --- arch/microblaze/kernel/prom.c | 44 ++--------------------------------------- arch/powerpc/kernel/prom.c | 46 ++++++++++--------------------------------- drivers/of/fdt.c | 38 +++++++++++++++++++++++++++++++++++ include/linux/of_fdt.h | 3 +++ 4 files changed, 53 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 50d8b09d5e3..5505bcffd7d 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -108,49 +108,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) +void __init early_init_dt_scan_chosen_arch(unsigned long node) { - unsigned long l; - char *p; - - pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && - strcmp(uname, "chosen@0") != 0)) - return 0; - -#ifdef CONFIG_KEXEC - lprop = (u64 *)of_get_flat_dt_prop(node, - "linux,crashkernel-base", NULL); - if (lprop) - crashk_res.start = *lprop; - - lprop = (u64 *)of_get_flat_dt_prop(node, - "linux,crashkernel-size", NULL); - if (lprop) - crashk_res.end = crashk_res.start + *lprop - 1; -#endif - - early_init_dt_check_for_initrd(node); - - /* Retreive command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); - -#ifdef CONFIG_CMDLINE -#ifndef CONFIG_CMDLINE_FORCE - if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) -#endif - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - pr_debug("Command line is: %s\n", cmd_line); - - /* break now */ - return 1; + /* No Microblaze specific code here */ } static int __init early_init_dt_scan_memory(unsigned long node, diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4e3181cded4..877fad9b374 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -367,18 +367,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) +void __init early_init_dt_scan_chosen_arch(unsigned long node) { unsigned long *lprop; - unsigned long l; - char *p; - - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) - return 0; #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -389,17 +380,17 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #endif /* mem=x on the command line is the preferred mechanism */ - lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); - if (lprop) - memory_limit = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); - if (lprop) - tce_alloc_start = *lprop; - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); - if (lprop) - tce_alloc_end = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; #endif #ifdef CONFIG_KEXEC @@ -411,23 +402,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif - - early_init_dt_check_for_initrd(node); - - /* Retreive command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); - -#ifdef CONFIG_CMDLINE - if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - DBG("Command line is: %s\n", cmd_line); - - /* break now */ - return 1; } #ifdef CONFIG_PPC_PSERIES diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ebce509b088..616a4767a95 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_PPC +#include +#endif /* CONFIG_PPC */ + int __initdata dt_root_addr_cells; int __initdata dt_root_size_cells; @@ -440,6 +444,40 @@ u64 __init dt_mem_next_cell(int s, u32 **cellp) return of_read_number(p, s); } +int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, + int depth, void *data) +{ + unsigned long l; + char *p; + + pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); + + if (depth != 1 || + (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) + return 0; + + early_init_dt_check_for_initrd(node); + + /* Retreive command line */ + p = of_get_flat_dt_prop(node, "bootargs", &l); + if (p != NULL && l > 0) + strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); + +#ifdef CONFIG_CMDLINE +#ifndef CONFIG_CMDLINE_FORCE + if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) +#endif + strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); +#endif /* CONFIG_CMDLINE */ + + early_init_dt_scan_chosen_arch(node); + + pr_debug("Command line is: %s\n", cmd_line); + + /* break now */ + return 1; +} + /** * unflatten_device_tree - create tree of device_nodes from flat blob * diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index d1a37e56031..8118d4559dd 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -71,6 +71,9 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name, unsigned long *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern unsigned long of_get_flat_dt_root(void); +extern void early_init_dt_scan_chosen_arch(unsigned long node); +extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, + int depth, void *data); extern void early_init_dt_check_for_initrd(unsigned long node); extern u64 dt_mem_next_cell(int s, u32 **cellp); -- cgit v1.2.3-70-g09d2 From 0f78231bffb868a30e8533aace142213266bb811 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Dec 2009 13:37:02 +0100 Subject: mac80211: enable spatial multiplexing powersave Enable spatial multiplexing in mac80211 by telling the driver what to do and, where necessary, sending action frames to the AP to update the requested SMPS mode. Also includes a trivial implementation for hwsim that just logs the requested mode. For now, the userspace interface is in debugfs only, and let you toggle the requested mode at any time. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 24 ++++++-- include/linux/ieee80211.h | 25 +++++++- include/net/mac80211.h | 59 ++++++++++++++++++ net/mac80211/cfg.c | 49 +++++++++++++++ net/mac80211/debugfs_netdev.c | 111 +++++++++++++++++++++++++++++++++- net/mac80211/driver-trace.h | 2 + net/mac80211/ht.c | 47 ++++++++++++++ net/mac80211/ieee80211_i.h | 14 +++++ net/mac80211/main.c | 24 ++++++++ net/mac80211/mlme.c | 63 +++++++++++++++++-- net/mac80211/status.c | 38 ++++++++++++ net/mac80211/util.c | 74 +++++++++++++++++++++++ 12 files changed, 518 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 88e41176e7f..92c669ebb35 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -618,12 +618,26 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) { struct mac80211_hwsim_data *data = hw->priv; struct ieee80211_conf *conf = &hw->conf; - - printk(KERN_DEBUG "%s:%s (freq=%d idle=%d ps=%d)\n", + static const char *chantypes[4] = { + [NL80211_CHAN_NO_HT] = "noht", + [NL80211_CHAN_HT20] = "ht20", + [NL80211_CHAN_HT40MINUS] = "ht40-", + [NL80211_CHAN_HT40PLUS] = "ht40+", + }; + static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { + [IEEE80211_SMPS_AUTOMATIC] = "auto", + [IEEE80211_SMPS_OFF] = "off", + [IEEE80211_SMPS_STATIC] = "static", + [IEEE80211_SMPS_DYNAMIC] = "dynamic", + }; + + printk(KERN_DEBUG "%s:%s (freq=%d/%s idle=%d ps=%d smps=%s)\n", wiphy_name(hw->wiphy), __func__, conf->channel->center_freq, + chantypes[conf->channel_type], !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS)); + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); @@ -1082,7 +1096,9 @@ static int __init init_mac80211_hwsim(void) BIT(NL80211_IFTYPE_MESH_POINT); hw->flags = IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_SIGNAL_DBM; + IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_SUPPORTS_STATIC_SMPS | + IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS; /* ask mac80211 to reserve space for magic */ hw->vif_data_size = sizeof(struct hwsim_vif_priv); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9724a28c0c..e8d43d0ff2c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -707,6 +707,10 @@ struct ieee80211_mgmt { u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __attribute__ ((packed)) sa_query; + struct { + u8 action; + u8 smps_control; + } __attribute__ ((packed)) ht_smps; } u; } __attribute__ ((packed)) action; } u; @@ -824,6 +828,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_CAP_LDPC_CODING 0x0001 #define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 #define IEEE80211_HT_CAP_SM_PS 0x000C +#define IEEE80211_HT_CAP_SM_PS_SHIFT 2 #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 @@ -839,6 +844,7 @@ struct ieee80211_ht_cap { /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ #define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 #define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C +#define IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT 2 /* * Maximum length of AMPDU that the STA can receive. @@ -922,12 +928,17 @@ struct ieee80211_ht_info { #define IEEE80211_MAX_AMPDU_BUF 0x40 -/* Spatial Multiplexing Power Save Modes */ +/* Spatial Multiplexing Power Save Modes (for capability) */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 #define WLAN_HT_CAP_SM_PS_INVALID 2 #define WLAN_HT_CAP_SM_PS_DISABLED 3 +/* for SM power control field lower two bits */ +#define WLAN_HT_SMPS_CONTROL_DISABLED 0 +#define WLAN_HT_SMPS_CONTROL_STATIC 1 +#define WLAN_HT_SMPS_CONTROL_DYNAMIC 3 + /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 @@ -1150,6 +1161,18 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* HT action codes */ +enum ieee80211_ht_actioncode { + WLAN_HT_ACTION_NOTIFY_CHANWIDTH = 0, + WLAN_HT_ACTION_SMPS = 1, + WLAN_HT_ACTION_PSMP = 2, + WLAN_HT_ACTION_PCO_PHASE = 3, + WLAN_HT_ACTION_CSI = 4, + WLAN_HT_ACTION_NONCOMPRESSED_BF = 5, + WLAN_HT_ACTION_COMPRESSED_BF = 6, + WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e94cc526b0f..e6b6bf81d5b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -597,8 +597,10 @@ enum ieee80211_conf_flags { * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed + * @IEEE80211_CONF_CHANGE_SMPS: Spatial multiplexing powersave mode changed */ enum ieee80211_conf_changed { + IEEE80211_CONF_CHANGE_SMPS = BIT(1), IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_MONITOR = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), @@ -608,6 +610,21 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_IDLE = BIT(8), }; +/** + * enum ieee80211_smps_mode - spatial multiplexing power save mode + * + * @ + */ +enum ieee80211_smps_mode { + IEEE80211_SMPS_AUTOMATIC, + IEEE80211_SMPS_OFF, + IEEE80211_SMPS_STATIC, + IEEE80211_SMPS_DYNAMIC, + + /* keep last */ + IEEE80211_SMPS_NUM_MODES, +}; + /** * struct ieee80211_conf - configuration of the device * @@ -636,6 +653,10 @@ enum ieee80211_conf_changed { * @short_frame_max_tx_count: Maximum number of transmissions for a "short" * frame, called "dot11ShortRetryLimit" in 802.11, but actually means the * number of transmissions not the number of retries + * + * @smps_mode: spatial multiplexing powersave mode; note that + * %IEEE80211_SMPS_STATIC is used when the device is not + * configured for an HT channel */ struct ieee80211_conf { u32 flags; @@ -648,6 +669,7 @@ struct ieee80211_conf { struct ieee80211_channel *channel; enum nl80211_channel_type channel_type; + enum ieee80211_smps_mode smps_mode; }; /** @@ -930,6 +952,16 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_BEACON_FILTER: * Hardware supports dropping of irrelevant beacon frames to * avoid waking up cpu. + * + * @IEEE80211_HW_SUPPORTS_STATIC_SMPS: + * Hardware supports static spatial multiplexing powersave, + * ie. can turn off all but one chain even on HT connections + * that should be using more chains. + * + * @IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS: + * Hardware supports dynamic spatial multiplexing powersave, + * ie. can turn off all but one chain and then wake the rest + * up as required after, for example, rts/cts handshake. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -947,6 +979,8 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_DYNAMIC_PS = 1<<12, IEEE80211_HW_MFP_CAPABLE = 1<<13, IEEE80211_HW_BEACON_FILTER = 1<<14, + IEEE80211_HW_SUPPORTS_STATIC_SMPS = 1<<15, + IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS = 1<<16, }; /** @@ -1214,6 +1248,31 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * signal strength threshold checking. */ +/** + * DOC: Spatial multiplexing power save + * + * SMPS (Spatial multiplexing power save) is a mechanism to conserve + * power in an 802.11n implementation. For details on the mechanism + * and rationale, please refer to 802.11 (as amended by 802.11n-2009) + * "11.2.3 SM power save". + * + * The mac80211 implementation is capable of sending action frames + * to update the AP about the station's SMPS mode, and will instruct + * the driver to enter the specific mode. It will also announce the + * requested SMPS mode during the association handshake. Hardware + * support for this feature is required, and can be indicated by + * hardware flags. + * + * The default mode will be "automatic", which nl80211/cfg80211 + * defines to be dynamic SMPS in (regular) powersave, and SMPS + * turned off otherwise. + * + * To support this feature, the driver must set the appropriate + * hardware support flags, and handle the SMPS flag to the config() + * operation. It will then with this mechanism be instructed to + * enter the requested SMPS mode while associated to an HT AP. + */ + /** * DOC: Frame filtering * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fcfa1bf776a..8c35418d1c9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1318,6 +1318,50 @@ static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) } #endif +int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) +{ + const u8 *ap; + enum ieee80211_smps_mode old_req; + int err; + + old_req = sdata->u.mgd.req_smps; + sdata->u.mgd.req_smps = smps_mode; + + if (old_req == smps_mode && + smps_mode != IEEE80211_SMPS_AUTOMATIC) + return 0; + + /* + * If not associated, or current association is not an HT + * association, there's no need to send an action frame. + */ + if (!sdata->u.mgd.associated || + sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) { + mutex_lock(&sdata->local->iflist_mtx); + ieee80211_recalc_smps(sdata->local, sdata); + mutex_unlock(&sdata->local->iflist_mtx); + return 0; + } + + ap = sdata->u.mgd.associated->cbss.bssid; + + if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { + if (sdata->u.mgd.powersave) + smps_mode = IEEE80211_SMPS_DYNAMIC; + else + smps_mode = IEEE80211_SMPS_OFF; + } + + /* send SM PS frame to AP */ + err = ieee80211_send_smps_action(sdata, smps_mode, + ap, ap); + if (err) + sdata->u.mgd.req_smps = old_req; + + return err; +} + static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { @@ -1335,6 +1379,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, sdata->u.mgd.powersave = enabled; conf->dynamic_ps_timeout = timeout; + /* no change, but if automatic follow powersave */ + mutex_lock(&sdata->u.mgd.mtx); + __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); + mutex_unlock(&sdata->u.mgd.mtx); + if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5d9c797635a..35598350388 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -41,6 +41,30 @@ static ssize_t ieee80211_if_read( return ret; } +static ssize_t ieee80211_if_write( + struct ieee80211_sub_if_data *sdata, + const char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) +{ + u8 *buf; + ssize_t ret = -ENODEV; + + buf = kzalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + rtnl_lock(); + if (sdata->dev->reg_state == NETREG_REGISTERED) + ret = (*write)(sdata, buf, count); + rtnl_unlock(); + + return ret; +} + #define IEEE80211_IF_FMT(name, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ @@ -71,7 +95,7 @@ static ssize_t ieee80211_if_fmt_##name( \ return scnprintf(buf, buflen, "%pM\n", sdata->field); \ } -#define __IEEE80211_IF_FILE(name) \ +#define __IEEE80211_IF_FILE(name, _write) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ @@ -82,12 +106,24 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ } \ static const struct file_operations name##_ops = { \ .read = ieee80211_if_read_##name, \ + .write = (_write), \ .open = mac80211_open_file_generic, \ } +#define __IEEE80211_IF_FILE_W(name) \ +static ssize_t ieee80211_if_write_##name(struct file *file, \ + const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_write(file->private_data, userbuf, count, \ + ppos, ieee80211_if_parse_##name); \ +} \ +__IEEE80211_IF_FILE(name, ieee80211_if_write_##name) + + #define IEEE80211_IF_FILE(name, field, format) \ IEEE80211_IF_FMT_##format(name, field) \ - __IEEE80211_IF_FILE(name) + __IEEE80211_IF_FILE(name, NULL) /* common attributes */ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); @@ -99,6 +135,70 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); IEEE80211_IF_FILE(capab, u.mgd.capab, HEX); +static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) +{ + struct ieee80211_local *local = sdata->local; + int err; + + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) && + smps_mode == IEEE80211_SMPS_STATIC) + return -EINVAL; + + /* auto should be dynamic if in PS mode */ + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) && + (smps_mode == IEEE80211_SMPS_DYNAMIC || + smps_mode == IEEE80211_SMPS_AUTOMATIC)) + return -EINVAL; + + /* supported only on managed interfaces for now */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + mutex_lock(&local->iflist_mtx); + err = __ieee80211_request_smps(sdata, smps_mode); + mutex_unlock(&local->iflist_mtx); + + return err; +} + +static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { + [IEEE80211_SMPS_AUTOMATIC] = "auto", + [IEEE80211_SMPS_OFF] = "off", + [IEEE80211_SMPS_STATIC] = "static", + [IEEE80211_SMPS_DYNAMIC] = "dynamic", +}; + +static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + return snprintf(buf, buflen, "request: %s\nused: %s\n", + smps_modes[sdata->u.mgd.req_smps], + smps_modes[sdata->u.mgd.ap_smps]); +} + +static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, + const char *buf, int buflen) +{ + enum ieee80211_smps_mode mode; + + for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) { + if (strncmp(buf, smps_modes[mode], buflen) == 0) { + int err = ieee80211_set_smps(sdata, mode); + if (!err) + return buflen; + return err; + } + } + + return -EINVAL; +} + +__IEEE80211_IF_FILE_W(smps); + /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -109,7 +209,7 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( return scnprintf(buf, buflen, "%u\n", skb_queue_len(&sdata->u.ap.ps_bc_buf)); } -__IEEE80211_IF_FILE(num_buffered_multicast); +__IEEE80211_IF_FILE(num_buffered_multicast, NULL); /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); @@ -158,6 +258,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, debugfs_create_file(#name, 0400, sdata->debugfs.dir, \ sdata, &name##_ops); +#define DEBUGFS_ADD_MODE(name, mode) \ + debugfs_create_file(#name, mode, sdata->debugfs.dir, \ + sdata, &name##_ops); + static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, sta); @@ -167,6 +271,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(bssid, sta); DEBUGFS_ADD(aid, sta); DEBUGFS_ADD(capab, sta); + DEBUGFS_ADD_MODE(smps, 0600); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index ee2d19a25ce..7a849b92016 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -140,6 +140,7 @@ TRACE_EVENT(drv_config, __field(u8, short_frame_max_tx_count) __field(int, center_freq) __field(int, channel_type) + __field(int, smps) ), TP_fast_assign( @@ -155,6 +156,7 @@ TRACE_EVENT(drv_config, __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; __entry->center_freq = local->hw.conf.channel->center_freq; __entry->channel_type = local->hw.conf.channel_type; + __entry->smps = local->hw.conf.smps_mode; ), TP_printk( diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 45ebd062a2f..63b8f86b7f1 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -166,3 +166,50 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, spin_unlock_bh(&sta->lock); } } + +int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps, const u8 *da, + const u8 *bssid) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *action_frame; + + /* 27 = header + category + action + smps mode */ + skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + action_frame = (void *)skb_put(skb, 27); + memcpy(action_frame->da, da, ETH_ALEN); + memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(action_frame->bssid, bssid, ETH_ALEN); + action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + action_frame->u.action.category = WLAN_CATEGORY_HT; + action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; + switch (smps) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + case IEEE80211_SMPS_OFF: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_DISABLED; + break; + case IEEE80211_SMPS_STATIC: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_STATIC; + break; + case IEEE80211_SMPS_DYNAMIC: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_DYNAMIC; + break; + } + + /* we'll do more on status of this frame */ + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); + + return 0; +} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 178e329f925..e63aecbddfb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -297,6 +297,8 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ + enum ieee80211_smps_mode req_smps, /* requested smps mode */ + ap_smps; /* smps mode AP thinks we're in */ unsigned long request; @@ -587,6 +589,9 @@ struct ieee80211_local { /* used for uploading changed mc list */ struct work_struct reconfig_filter; + /* used to reconfigure hardware SM PS */ + struct work_struct recalc_smps; + /* aggregated multicast list */ struct dev_addr_list *mc_list; int mc_count; @@ -760,6 +765,8 @@ struct ieee80211_local { int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ + enum ieee80211_smps_mode smps_mode; + struct work_struct restart_work; #ifdef CONFIG_MAC80211_DEBUGFS @@ -978,6 +985,9 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); +int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps, const u8 *da, + const u8 *bssid); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u16 initiator, u16 reason); @@ -1088,6 +1098,10 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band); +int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode); +void ieee80211_recalc_smps(struct ieee80211_local *local, + struct ieee80211_sub_if_data *forsdata); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 98320a94c27..e1293e8ed83 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -113,6 +113,18 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_CHANNEL; } + if (!conf_is_ht(&local->hw.conf)) { + /* + * mac80211.h documents that this is only valid + * when the channel is set to an HT type, and + * that otherwise STATIC is used. + */ + local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC; + } else if (local->hw.conf.smps_mode != local->smps_mode) { + local->hw.conf.smps_mode = local->smps_mode; + changed |= IEEE80211_CONF_CHANGE_SMPS; + } + if (scan_chan) power = chan->max_power; else @@ -297,6 +309,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_restart_hw); +static void ieee80211_recalc_smps_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, recalc_smps); + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_smps(local, NULL); + mutex_unlock(&local->iflist_mtx); +} + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -370,6 +392,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_WORK(&local->restart_work, ieee80211_restart_work); INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); + INIT_WORK(&local->recalc_smps, ieee80211_recalc_smps_work); + local->smps_mode = IEEE80211_SMPS_OFF; INIT_WORK(&local->dynamic_ps_enable_work, ieee80211_dynamic_ps_enable_work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cd5dcc3d8c2..0a762a9ba4d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -398,6 +398,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, __le16 tmp; u32 flags = local->hw.conf.channel->flags; + /* determine capability flags */ + switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_HT40PLUS) { @@ -413,17 +415,64 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, break; } - tmp = cpu_to_le16(cap); - pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2); + /* set SM PS mode properly */ + cap &= ~IEEE80211_HT_CAP_SM_PS; + /* new association always uses requested smps mode */ + if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { + if (ifmgd->powersave) + ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC; + else + ifmgd->ap_smps = IEEE80211_SMPS_OFF; + } else + ifmgd->ap_smps = ifmgd->req_smps; + + switch (ifmgd->ap_smps) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + case IEEE80211_SMPS_OFF: + cap |= WLAN_HT_CAP_SM_PS_DISABLED << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + case IEEE80211_SMPS_STATIC: + cap |= WLAN_HT_CAP_SM_PS_STATIC << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + case IEEE80211_SMPS_DYNAMIC: + cap |= WLAN_HT_CAP_SM_PS_DYNAMIC << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + } + + /* reserve and fill IE */ + + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); *pos++ = WLAN_EID_HT_CAPABILITY; *pos++ = sizeof(struct ieee80211_ht_cap); memset(pos, 0, sizeof(struct ieee80211_ht_cap)); + + /* capability flags */ + tmp = cpu_to_le16(cap); memcpy(pos, &tmp, sizeof(u16)); pos += sizeof(u16); - /* TODO: needs a define here for << 2 */ + + /* AMPDU parameters */ *pos++ = sband->ht_cap.ampdu_factor | - (sband->ht_cap.ampdu_density << 2); + (sband->ht_cap.ampdu_density << + IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT); + + /* MCS set */ memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); + pos += sizeof(sband->ht_cap.mcs); + + /* extended capabilities */ + pos += sizeof(__le16); + + /* BF capabilities */ + pos += sizeof(__le32); + + /* antenna selection */ + pos += sizeof(u8); } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -932,6 +981,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); + ieee80211_recalc_smps(local, sdata); mutex_unlock(&local->iflist_mtx); netif_start_queue(sdata->dev); @@ -2327,6 +2377,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->flags |= IEEE80211_STA_WMM_ENABLED; mutex_init(&ifmgd->mtx); + + if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) + ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; + else + ifmgd->req_smps = IEEE80211_SMPS_OFF; } /* scan finished notification */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b4608f11a40..0c0850d37dd 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -134,6 +134,40 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, dev_kfree_skb(skb); } +static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *) skb->data; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + + if (ieee80211_is_action(mgmt->frame_control) && + sdata->vif.type == NL80211_IFTYPE_STATION && + mgmt->u.action.category == WLAN_CATEGORY_HT && + mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS) { + /* + * This update looks racy, but isn't -- if we come + * here we've definitely got a station that we're + * talking to, and on a managed interface that can + * only be the AP. And the only other place updating + * this variable is before we're associated. + */ + switch (mgmt->u.action.u.ht_smps.smps_control) { + case WLAN_HT_SMPS_CONTROL_DYNAMIC: + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_SMPS_CONTROL_STATIC: + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_SMPS_CONTROL_DISABLED: + default: /* shouldn't happen since we don't send that */ + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_OFF; + break; + } + + ieee80211_queue_work(&local->hw, &local->recalc_smps); + } +} + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -210,6 +244,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rate_control_tx_status(local, sband, sta, skb); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) ieee80211s_update_metric(local, sta, skb); + + if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && + (info->flags & IEEE80211_TX_STAT_ACK)) + ieee80211_frame_acked(sta, skb); } rcu_read_unlock(); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d54dbe8e09b..086ef6257b4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1170,3 +1170,77 @@ int ieee80211_reconfig(struct ieee80211_local *local) return 0; } +static int check_mgd_smps(struct ieee80211_if_managed *ifmgd, + enum ieee80211_smps_mode *smps_mode) +{ + if (ifmgd->associated) { + *smps_mode = ifmgd->ap_smps; + + if (*smps_mode == IEEE80211_SMPS_AUTOMATIC) { + if (ifmgd->powersave) + *smps_mode = IEEE80211_SMPS_DYNAMIC; + else + *smps_mode = IEEE80211_SMPS_OFF; + } + + return 1; + } + + return 0; +} + +/* must hold iflist_mtx */ +void ieee80211_recalc_smps(struct ieee80211_local *local, + struct ieee80211_sub_if_data *forsdata) +{ + struct ieee80211_sub_if_data *sdata; + enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; + int count = 0; + + if (forsdata) + WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx)); + + WARN_ON(!mutex_is_locked(&local->iflist_mtx)); + + /* + * This function could be improved to handle multiple + * interfaces better, but right now it makes any + * non-station interfaces force SM PS to be turned + * off. If there are multiple station interfaces it + * could also use the best possible mode, e.g. if + * one is in static and the other in dynamic then + * dynamic is ok. + */ + + list_for_each_entry(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + goto set; + if (sdata != forsdata) { + /* + * This nested is ok -- we are holding the iflist_mtx + * so can't get here twice or so. But it's required + * since normally we acquire it first and then the + * iflist_mtx. + */ + mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING); + count += check_mgd_smps(&sdata->u.mgd, &smps_mode); + mutex_unlock(&sdata->u.mgd.mtx); + } else + count += check_mgd_smps(&sdata->u.mgd, &smps_mode); + + if (count > 1) { + smps_mode = IEEE80211_SMPS_OFF; + break; + } + } + + if (smps_mode == local->smps_mode) + return; + + set: + local->smps_mode = smps_mode; + /* changed flag is auto-detected for this */ + ieee80211_hw_config(local, 0); +} -- cgit v1.2.3-70-g09d2 From 9da3e068142ec7856b2f13261dcf0660fad32b61 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 7 Dec 2009 15:57:50 -0500 Subject: mac80211: only bother printing highest data rate on debugfs if its set IEEE-802.11n spec says the RX highest data rate field does not specify the highest supported RX data rate if its not set. Ignore it if not set then. Refer to section 7.3.56.4 Cc: johannes@sipsolutions.net Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 5 ++++- net/mac80211/debugfs_sta.c | 12 ++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e8d43d0ff2c..098bedcde9b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -775,7 +775,10 @@ struct ieee80211_bar { /** * struct ieee80211_mcs_info - MCS information * @rx_mask: RX mask - * @rx_highest: highest supported RX rate + * @rx_highest: highest supported RX rate. If set represents + * the highest supported RX data rate in units of 1 Mbps. + * If this field is 0 this value should not be used to + * consider the highest RX data rate supported. * @tx_params: TX parameters */ struct ieee80211_mcs_info { diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index c833b6ce990..0d4a759ba72 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -218,11 +218,19 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n", htc->ampdu_factor, htc->ampdu_density); p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:"); + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) p += scnprintf(p, sizeof(buf)+buf-p, " %.2x", htc->mcs.rx_mask[i]); - p += scnprintf(p, sizeof(buf)+buf-p, "\nMCS rx highest: %d\n", - le16_to_cpu(htc->mcs.rx_highest)); + p += scnprintf(p, sizeof(buf)+buf-p, "\n"); + + /* If not set this is meaningless */ + if (le16_to_cpu(htc->mcs.rx_highest)) { + p += scnprintf(p, sizeof(buf)+buf-p, + "MCS rx highest: %d Mbps\n", + le16_to_cpu(htc->mcs.rx_highest)); + } + p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n", htc->mcs.tx_params); } -- cgit v1.2.3-70-g09d2 From 31d12926e37291970dd4f6e9940df3897766a81d Mon Sep 17 00:00:00 2001 From: laurent chavey Date: Tue, 15 Dec 2009 11:15:28 +0000 Subject: net: Add rtnetlink init_rcvwnd to set the TCP initial receive window Add rtnetlink init_rcvwnd to set the TCP initial receive window size advertised by passive and active TCP connections. The current Linux TCP implementation limits the advertised TCP initial receive window to the one prescribed by slow start. For short lived TCP connections used for transaction type of traffic (i.e. http requests), bounding the advertised TCP initial receive window results in increased latency to complete the transaction. Support for setting initial congestion window is already supported using rtnetlink init_cwnd, but the feature is useless without the ability to set a larger TCP initial receive window. The rtnetlink init_rcvwnd allows increasing the TCP initial receive window, allowing TCP connection to advertise larger TCP receive window than the ones bounded by slow start. Signed-off-by: Laurent Chavey Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ include/net/dst.h | 2 -- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 3 ++- net/ipv4/tcp_output.c | 17 +++++++++++++---- net/ipv6/syncookies.c | 3 ++- 6 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 05330fc5b43..9590364fe8b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -362,6 +362,8 @@ enum { #define RTAX_FEATURES RTAX_FEATURES RTAX_RTO_MIN, #define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND __RTAX_MAX }; diff --git a/include/net/dst.h b/include/net/dst.h index 39c4a5963e1..ce078cda6b7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -83,8 +83,6 @@ struct dst_entry { * (L1_CACHE_SIZE would be too much) */ #ifdef CONFIG_64BIT - long __pad_to_align_refcnt[2]; -#else long __pad_to_align_refcnt[1]; #endif /* diff --git a/include/net/tcp.h b/include/net/tcp.h index 185e22baecb..788c99f9859 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -965,7 +965,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale); + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd); static inline int tcp_win_from_space(int space) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 66fd80ef247..5c24db4a3c9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(&rt->u.dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 12b2af36eab..4a1605d3f90 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale) + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); @@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss, init_cwnd = 2; else if (mss > 1460) init_cwnd = 3; - if (*rcv_wnd > init_cwnd * mss) + /* when initializing use the value from init_rcv_wnd + * rather than the default from above + */ + if (init_rcv_wnd && + (*rcv_wnd > init_rcv_wnd * mss)) + *rcv_wnd = init_rcv_wnd * mss; + else if (*rcv_wnd > init_cwnd * mss) *rcv_wnd = init_cwnd * mss; } @@ -2417,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; } @@ -2544,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk) &tp->rcv_wnd, &tp->window_clamp, sysctl_tcp_window_scaling, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7208a06576c..34d1f0690d7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -269,7 +269,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; -- cgit v1.2.3-70-g09d2 From e5cd6fe391aa8c93560bb7ffdfe334cf4d0a02e4 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:00 +0000 Subject: llc: add support for LLC_OPT_PKTINFO Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/llc.h | 7 +++++++ include/net/llc_conn.h | 1 + net/llc/af_llc.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/llc.h b/include/linux/llc.h index 7733585603f..ad7074ba81a 100644 --- a/include/linux/llc.h +++ b/include/linux/llc.h @@ -36,6 +36,7 @@ enum llc_sockopts { LLC_OPT_BUSY_TMR_EXP, /* busy state expire time (secs). */ LLC_OPT_TX_WIN, /* tx window size. */ LLC_OPT_RX_WIN, /* rx window size. */ + LLC_OPT_PKTINFO, /* ancillary packet information. */ LLC_OPT_MAX }; @@ -70,6 +71,12 @@ enum llc_sockopts { #define LLC_SAP_RM 0xD4 /* Resource Management */ #define LLC_SAP_GLOBAL 0xFF /* Global SAP. */ +struct llc_pktinfo { + int lpi_ifindex; + unsigned char lpi_sap; + unsigned char lpi_mac[IFHWADDRLEN]; +}; + #ifdef __KERNEL__ #define LLC_SAP_DYN_START 0xC0 #define LLC_SAP_DYN_STOP 0xDE diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index e2374e34989..fe982fd94c4 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -76,6 +76,7 @@ struct llc_sock { u32 rx_pdu_hdr; /* used for saving header of last pdu received and caused sending FRMR. Used for resending FRMR */ + u32 cmsg_flags; }; static inline struct llc_sock *llc_sk(const struct sock *sk) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 3a66546cad0..ac691fe0807 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -47,6 +47,10 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #define dprintk(args...) #endif +/* Maybe we'll add some more in the future. */ +#define LLC_CMSG_PKTINFO 1 + + /** * llc_ui_next_link_no - return the next unused link number for a sap * @sap: Address of sap to get link number from. @@ -591,6 +595,20 @@ static int llc_wait_data(struct sock *sk, long timeo) return rc; } +static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) +{ + struct llc_sock *llc = llc_sk(skb->sk); + + if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { + struct llc_pktinfo info; + + info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; + llc_pdu_decode_dsap(skb, &info.lpi_sap); + llc_pdu_decode_da(skb, info.lpi_mac); + put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info); + } +} + /** * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. @@ -812,6 +830,8 @@ copy_uaddr: memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); } + if (llc_sk(sk)->cmsg_flags) + llc_cmsg_rcv(msg, skb); goto out; } @@ -1030,6 +1050,12 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, goto out; llc->rw = opt; break; + case LLC_OPT_PKTINFO: + if (opt) + llc->cmsg_flags |= LLC_CMSG_PKTINFO; + else + llc->cmsg_flags &= ~LLC_CMSG_PKTINFO; + break; default: rc = -ENOPROTOOPT; goto out; @@ -1083,6 +1109,9 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, val = llc->k; break; case LLC_OPT_RX_WIN: val = llc->rw; break; + case LLC_OPT_PKTINFO: + val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0; + break; default: rc = -ENOPROTOOPT; goto out; -- cgit v1.2.3-70-g09d2 From 49f474331e563a6ecf3b1e87ec27ec5482b3e4f1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 27 Dec 2009 11:51:52 +0100 Subject: perf events: Remove arg from perf sched hooks Since we only ever schedule the local cpu, there is no need to pass the cpu number to the perf sched hooks. This micro-optimizes things a bit. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 12 ++++++------ kernel/perf_event.c | 27 ++++++++++++++------------- kernel/sched.c | 6 +++--- 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c66b34f75ee..a494e750129 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -746,10 +746,10 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); -extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_event_task_tick(struct task_struct *task, int cpu); + struct task_struct *next); +extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -870,12 +870,12 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); #else static inline void -perf_event_task_sched_in(struct task_struct *task, int cpu) { } +perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } + struct task_struct *next) { } static inline void -perf_event_task_tick(struct task_struct *task, int cpu) { } +perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 03cc061398d..099bd662daa 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1170,9 +1170,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, * not restart the event. */ void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) + struct task_struct *next) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; @@ -1252,8 +1252,9 @@ static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) static void __perf_event_sched_in(struct perf_event_context *ctx, - struct perf_cpu_context *cpuctx, int cpu) + struct perf_cpu_context *cpuctx) { + int cpu = smp_processor_id(); struct perf_event *event; int can_add_hw = 1; @@ -1326,24 +1327,24 @@ __perf_event_sched_in(struct perf_event_context *ctx, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void perf_event_task_sched_in(struct task_struct *task, int cpu) +void perf_event_task_sched_in(struct task_struct *task) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; if (likely(!ctx)) return; if (cpuctx->task_ctx == ctx) return; - __perf_event_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx); cpuctx->task_ctx = ctx; } -static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = &cpuctx->ctx; - __perf_event_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx); } #define MAX_INTERRUPTS (~0ULL) @@ -1461,7 +1462,7 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); } -void perf_event_task_tick(struct task_struct *curr, int cpu) +void perf_event_task_tick(struct task_struct *curr) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; @@ -1469,7 +1470,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) if (!atomic_read(&nr_events)) return; - cpuctx = &per_cpu(perf_cpu_context, cpu); + cpuctx = &__get_cpu_var(perf_cpu_context); ctx = curr->perf_event_ctxp; perf_ctx_adjust_freq(&cpuctx->ctx); @@ -1484,9 +1485,9 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) if (ctx) rotate_ctx(ctx); - perf_event_cpu_sched_in(cpuctx, cpu); + perf_event_cpu_sched_in(cpuctx); if (ctx) - perf_event_task_sched_in(curr, cpu); + perf_event_task_sched_in(curr); } /* @@ -1527,7 +1528,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_unlock(&ctx->lock); - perf_event_task_sched_in(task, smp_processor_id()); + perf_event_task_sched_in(task); out: local_irq_restore(flags); } diff --git a/kernel/sched.c b/kernel/sched.c index 18cceeecce3..d6527ac0f6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2752,7 +2752,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_event_task_sched_in(current, cpu_of(rq)); + perf_event_task_sched_in(current); finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -5266,7 +5266,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr, cpu); + perf_event_task_tick(curr); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -5480,7 +5480,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next); rq->nr_switches++; rq->curr = next; -- cgit v1.2.3-70-g09d2 From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 21 Dec 2009 14:27:35 +0800 Subject: perf events: Remove CONFIG_EVENT_PROFILE Quoted from Ingo: | This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE - | it's an unnecessary Kconfig complication. If both PERF_EVENTS and | EVENT_TRACING is enabled we should expose generic tracepoints. | | Nor is it limited to event 'profiling', so it has become a misnomer as | well. Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/linux/perf_event.h | 2 +- include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 12 ++++++------ include/trace/syscall.h | 4 ++-- init/Kconfig | 13 ------------- kernel/perf_event.c | 4 ++-- kernel/trace/Makefile | 4 +++- kernel/trace/trace_events_filter.c | 4 ++-- kernel/trace/trace_kprobe.c | 14 +++++++------- kernel/trace/trace_syscalls.c | 5 ++--- 11 files changed, 28 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80d..0a09e758c7d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -188,7 +188,7 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS struct perf_event; extern int ftrace_profile_enable(int event_id); extern void ftrace_profile_disable(int event_id); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a494e750129..9a1d276db75 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -658,7 +658,7 @@ struct perf_event { perf_overflow_handler_t overflow_handler; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING struct event_filter *filter; #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f..b7c7fcf7790 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,7 +99,7 @@ struct perf_event_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_enable = prof_sysenter_enable, \ @@ -113,7 +113,7 @@ struct perf_event_attr; #define TRACE_SYS_ENTER_PROFILE_INIT(sname) #define TRACE_SYS_EXIT_PROFILE(sname) #define TRACE_SYS_EXIT_PROFILE_INIT(sname) -#endif +#endif /* CONFIG_PERF_EVENTS */ #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a73..2fdd36df41f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -498,7 +498,7 @@ static inline int ftrace_get_offsets_##call( \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* * Generate the functions needed for tracepoint perf_event support. @@ -541,7 +541,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif +#endif /* CONFIG_PERF_EVENTS */ /* * Stage 4 of the trace events. @@ -626,7 +626,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define _TRACE_PROFILE_INIT(call) \ .profile_enable = ftrace_profile_enable_##call, \ @@ -634,7 +634,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #else #define _TRACE_PROFILE_INIT(call) -#endif +#endif /* CONFIG_PERF_EVENTS */ #undef __entry #define __entry entry @@ -834,7 +834,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * } */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #undef __perf_addr #define __perf_addr(a) __addr = (a) @@ -926,7 +926,7 @@ static void ftrace_profile_##call(proto) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ #undef _TRACE_PROFILE_INIT diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556b..3d463dcef29 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -49,12 +49,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -#ifdef CONFIG_EVENT_PROFILE + +#ifdef CONFIG_PERF_EVENTS int prof_sysenter_enable(struct ftrace_event_call *call); void prof_sysenter_disable(struct ftrace_event_call *call); int prof_sysexit_enable(struct ftrace_event_call *call); void prof_sysexit_disable(struct ftrace_event_call *call); - #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/init/Kconfig b/init/Kconfig index a23da9f0180..06dab27c18d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -966,19 +966,6 @@ config PERF_EVENTS Say Y if unsure. -config EVENT_PROFILE - bool "Tracepoint profiling sources" - depends on PERF_EVENTS && EVENT_TRACING - default y - help - Allow the use of tracepoints as software performance events. - - When this is enabled, you can create perf events based on - tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID - found in debugfs://tracing/events/*/*/id. (The -e/--events - option to the perf tool can parse and interpret symbolic - tracepoints, in the subsystem:tracepoint_name format.) - config PERF_COUNTERS bool "Kernel performance counters (old config option)" depends on HAVE_PERF_EVENTS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 099bd662daa..5b987b4a98a 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4177,7 +4177,7 @@ static const struct pmu perf_ops_task_clock = { .read = task_clock_perf_event_read, }; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) @@ -4282,7 +4282,7 @@ static void perf_event_free_filter(struct perf_event *event) { } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT static void bp_perf_event_destroy(struct perf_event *event) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cd9ecd89ec7..d00c6fe23f5 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -51,7 +51,9 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o -obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o +ifeq ($(CONFIG_PERF_EVENTS),y) +obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o +endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 50504cb228d..74563d7e102 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1360,7 +1360,7 @@ out_unlock: return err; } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS void ftrace_profile_free_filter(struct perf_event *event) { @@ -1428,5 +1428,5 @@ out_unlock: return err; } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 375f81a568d..75d75dec226 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1249,7 +1249,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, ", REC->" FIELD_STRING_RETIP); } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ static __kprobes int kprobe_profile_func(struct kprobe *kp, @@ -1407,7 +1407,7 @@ static void probe_profile_disable(struct ftrace_event_call *call) disable_kprobe(&tp->rp.kp); } } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ static __kprobes @@ -1417,10 +1417,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kprobe_trace_func(kp, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kprobe_profile_func(kp, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1431,10 +1431,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kretprobe_trace_func(ri, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kretprobe_profile_func(ri, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1463,7 +1463,7 @@ static int register_probe_event(struct trace_probe *tp) call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS call->profile_enable = probe_profile_enable; call->profile_disable = probe_profile_disable; #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd..f694f66d75b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void) } core_initcall(init_ftrace_syscalls); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); @@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -#endif - +#endif /* CONFIG_PERF_EVENTS */ -- cgit v1.2.3-70-g09d2 From d894837f23f491aa7ed167aae767fc07cfe6e6e6 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Wed, 23 Dec 2009 11:08:18 +0100 Subject: sched: might_sleep(): Make file parameter const char * Fixes a warning when building with g++: warning: deprecated conversion from string constant to 'char*' And the file parameter use is constant, so mark it as such. Signed-off-by: Simon Kagstrom Cc: peterz@infradead.org LKML-Reference: <20091223110818.442d848e@marrow.netinsight.se> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 5 +++-- kernel/sched.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fc9f5aab5f..785d7d1099d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -124,7 +124,7 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP - void __might_sleep(char *file, int line, int preempt_offset); + void __might_sleep(const char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep * @@ -138,7 +138,8 @@ extern int _cond_resched(void); # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) #else - static inline void __might_sleep(char *file, int line, int preempt_offset) { } + static inline void __might_sleep(const char *file, int line, + int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) #endif diff --git a/kernel/sched.c b/kernel/sched.c index c535cc4f642..64298a52eaa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9694,7 +9694,7 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); } -void __might_sleep(char *file, int line, int preempt_offset) +void __might_sleep(const char *file, int line, int preempt_offset) { #ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ -- cgit v1.2.3-70-g09d2 From 8e664fb3fd2b04e3ac5fad7f046000ba54e0e275 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 13:15:38 +0100 Subject: mac80211: split up and insert custom IEs correctly Currently, we insert all user-specified IEs before the HT IE for association, and after the HT IE for probe requests. For association, that's correct only if the user-specified IEs are RSN only, incorrect in all other cases including WPA. Change this to split apart the user-specified IEs in two places for association: before the HT IE (e.g. RSN), after the HT IE (generally empty right now I think?) and after WMM (all other vendor-specific IEs). For probes, split the IEs in different places to be correct according to the spec. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 39 ++++++++++--- net/mac80211/ieee80211_i.h | 4 ++ net/mac80211/util.c | 134 ++++++++++++++++++++++++++++++++++++++------- net/mac80211/work.c | 43 ++++++++++++--- 4 files changed, 184 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d62edc7df3a..aeea282bd2f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1085,12 +1085,12 @@ enum ieee80211_eid { WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, WLAN_EID_CHALLENGE = 16, - /* 802.11d */ + WLAN_EID_COUNTRY = 7, WLAN_EID_HP_PARAMS = 8, WLAN_EID_HP_TABLE = 9, WLAN_EID_REQUEST = 10, - /* 802.11e */ + WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, WLAN_EID_TSPEC = 13, @@ -1113,7 +1113,7 @@ enum ieee80211_eid { WLAN_EID_PREP = 69, WLAN_EID_PERR = 70, WLAN_EID_RANN = 49, /* compatible with FreeBSD */ - /* 802.11h */ + WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, WLAN_EID_TPC_REQUEST = 34, @@ -1124,20 +1124,41 @@ enum ieee80211_eid { WLAN_EID_MEASURE_REPORT = 39, WLAN_EID_QUIET = 40, WLAN_EID_IBSS_DFS = 41, - /* 802.11g */ + WLAN_EID_ERP_INFO = 42, WLAN_EID_EXT_SUPP_RATES = 50, - /* 802.11n */ + WLAN_EID_HT_CAPABILITY = 45, WLAN_EID_HT_INFORMATION = 61, - /* 802.11i */ + WLAN_EID_RSN = 48, - WLAN_EID_TIMEOUT_INTERVAL = 56, - WLAN_EID_MMIE = 76 /* 802.11w */, + WLAN_EID_MMIE = 76, WLAN_EID_WPA = 221, WLAN_EID_GENERIC = 221, WLAN_EID_VENDOR_SPECIFIC = 221, - WLAN_EID_QOS_PARAMETER = 222 + WLAN_EID_QOS_PARAMETER = 222, + + WLAN_EID_AP_CHAN_REPORT = 51, + WLAN_EID_NEIGHBOR_REPORT = 52, + WLAN_EID_RCPI = 53, + WLAN_EID_BSS_AVG_ACCESS_DELAY = 63, + WLAN_EID_ANTENNA_INFO = 64, + WLAN_EID_RSNI = 65, + WLAN_EID_MEASUREMENT_PILOT_TX_INFO = 66, + WLAN_EID_BSS_AVAILABLE_CAPACITY = 67, + WLAN_EID_BSS_AC_ACCESS_DELAY = 68, + WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, + WLAN_EID_MULTIPLE_BSSID = 71, + + WLAN_EID_MOBILITY_DOMAIN = 54, + WLAN_EID_FAST_BSS_TRANSITION = 55, + WLAN_EID_TIMEOUT_INTERVAL = 56, + WLAN_EID_RIC_DATA = 57, + WLAN_EID_RIC_DESCRIPTOR = 75, + + WLAN_EID_DSE_REGISTERED_LOCATION = 58, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59, + WLAN_EID_EXT_CHANSWITCH_ANN = 60, }; /* Action category code */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 97b6076b492..6ea4ffbf84d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1148,6 +1148,10 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, void ieee80211_recalc_smps(struct ieee80211_local *local, struct ieee80211_sub_if_data *forsdata); +size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset); +size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset); + /* internal work items */ void ieee80211_work_init(struct ieee80211_local *local); void ieee80211_add_work(struct ieee80211_work *wk); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5ffe9e831b6..1fdb80ff924 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -881,30 +881,66 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, enum ieee80211_band band) { struct ieee80211_supported_band *sband; - u8 *pos, *supp_rates_len, *esupp_rates_len = NULL; - int i; + u8 *pos; + size_t offset = 0, noffset; + int supp_rates_len, i; sband = local->hw.wiphy->bands[band]; pos = buffer; + supp_rates_len = min_t(int, sband->n_bitrates, 8); + *pos++ = WLAN_EID_SUPP_RATES; - supp_rates_len = pos; - *pos++ = 0; - - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - - if (esupp_rates_len) { - *esupp_rates_len += 1; - } else if (*supp_rates_len == 8) { - *pos++ = WLAN_EID_EXT_SUPP_RATES; - esupp_rates_len = pos; - *pos++ = 1; - } else - *supp_rates_len += 1; + *pos++ = supp_rates_len; - *pos++ = rate->bitrate / 5; + for (i = 0; i < supp_rates_len; i++) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + + /* insert "request information" if in custom IEs */ + if (ie && ie_len) { + static const u8 before_extrates[] = { + WLAN_EID_SSID, + WLAN_EID_SUPP_RATES, + WLAN_EID_REQUEST, + }; + noffset = ieee80211_ie_split(ie, ie_len, + before_extrates, + ARRAY_SIZE(before_extrates), + offset); + memcpy(pos, ie + offset, noffset - offset); + pos += noffset - offset; + offset = noffset; + } + + if (sband->n_bitrates > i) { + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = sband->n_bitrates - i; + + for (; i < sband->n_bitrates; i++) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + } + + /* insert custom IEs that go before HT */ + if (ie && ie_len) { + static const u8 before_ht[] = { + WLAN_EID_SSID, + WLAN_EID_SUPP_RATES, + WLAN_EID_REQUEST, + WLAN_EID_EXT_SUPP_RATES, + WLAN_EID_DS_PARAMS, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + }; + noffset = ieee80211_ie_split(ie, ie_len, + before_ht, ARRAY_SIZE(before_ht), + offset); + memcpy(pos, ie + offset, noffset - offset); + pos += noffset - offset; + offset = noffset; } if (sband->ht_cap.ht_supported) { @@ -936,9 +972,11 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, * that calculates local->scan_ies_len. */ - if (ie) { - memcpy(pos, ie, ie_len); - pos += ie_len; + /* add any remaining custom IEs */ + if (ie && ie_len) { + noffset = ie_len; + memcpy(pos, ie + offset, noffset - offset); + pos += noffset - offset; } return pos - buffer; @@ -1252,3 +1290,59 @@ void ieee80211_recalc_smps(struct ieee80211_local *local, /* changed flag is auto-detected for this */ ieee80211_hw_config(local, 0); } + +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +{ + int i; + + for (i = 0; i < n_ids; i++) + if (ids[i] == id) + return true; + return false; +} + +/** + * ieee80211_ie_split - split an IE buffer according to ordering + * + * @ies: the IE buffer + * @ielen: the length of the IE buffer + * @ids: an array with element IDs that are allowed before + * the split + * @n_ids: the size of the element ID array + * @offset: offset where to start splitting in the buffer + * + * This function splits an IE buffer by updating the @offset + * variable to point to the location where the buffer should be + * split. + * + * It assumes that the given IE buffer is well-formed, this + * has to be guaranteed by the caller! + * + * It also assumes that the IEs in the buffer are ordered + * correctly, if not the result of using this function will not + * be ordered correctly either, i.e. it does no reordering. + * + * The function returns the offset where the next part of the + * buffer starts, which may be @ielen if the entire (remainder) + * of the buffer should be used. + */ +size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset) +{ + size_t pos = offset; + + while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) + pos += 2 + ies[pos + 1]; + + return pos; +} + +size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) +{ + size_t pos = offset; + + while (pos < ielen && ies[pos] != WLAN_EID_VENDOR_SPECIFIC) + pos += 2 + ies[pos + 1]; + + return pos; +} diff --git a/net/mac80211/work.c b/net/mac80211/work.c index c03c22d5bca..affdd10b67a 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -204,6 +204,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u8 *pos; const u8 *ies; + size_t offset = 0, noffset; int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_supported_band *sband; @@ -337,14 +338,26 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } } - /* - * XXX: These IEs could contain (vendor-specified) - * IEs that belong after HT -- the buffer may - * need to be split up. - */ + /* if present, add any custom IEs that go before HT */ if (wk->ie_len && wk->ie) { - pos = skb_put(skb, wk->ie_len); - memcpy(pos, wk->ie, wk->ie_len); + static const u8 before_ht[] = { + WLAN_EID_SSID, + WLAN_EID_SUPP_RATES, + WLAN_EID_EXT_SUPP_RATES, + WLAN_EID_PWR_CAPABILITY, + WLAN_EID_SUPPORTED_CHANNELS, + WLAN_EID_RSN, + WLAN_EID_QOS_CAPA, + WLAN_EID_RRM_ENABLED_CAPABILITIES, + WLAN_EID_MOBILITY_DOMAIN, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + }; + noffset = ieee80211_ie_split(wk->ie, wk->ie_len, + before_ht, ARRAY_SIZE(before_ht), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, wk->ie + offset, noffset - offset); + offset = noffset; } if (wk->assoc.use_11n && wk->assoc.wmm_used && @@ -352,6 +365,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie, sband, wk->chan, wk->assoc.smps); + /* if present, add any custom non-vendor IEs that go after HT */ + if (wk->ie_len && wk->ie) { + noffset = ieee80211_ie_split_vendor(wk->ie, wk->ie_len, + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, wk->ie + offset, noffset - offset); + offset = noffset; + } + if (wk->assoc.wmm_used && local->hw.queues >= 4) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; @@ -365,6 +387,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, *pos++ = 0; } + /* add any remaining custom (i.e. vendor specific here) IEs */ + if (wk->ie_len && wk->ie) { + noffset = wk->ie_len; + pos = skb_put(skb, noffset - offset); + memcpy(pos, wk->ie + offset, noffset - offset); + } + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } -- cgit v1.2.3-70-g09d2 From 9588bbd5529461a3dacd435bf239c84c3508f569 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 23 Dec 2009 13:15:41 +0100 Subject: cfg80211: add remain-on-channel command Add new commands for requesting the driver to remain awake on a specified channel for the specified amount of time (and another command to cancel such an operation). This can be used to implement userspace-controlled off-channel operations, like Public Action frame exchange on another channel than the operation channel. The off-channel operation should behave similarly to scan, i.e. the local station (if associated) moves into power save mode to request the AP to buffer frames for it and then moves to the other channel to allow the off-channel operation to be completed. The duration parameter can be used to request enough time to receive a response from the target station. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 36 ++++++++ include/net/cfg80211.h | 47 +++++++++++ net/wireless/chan.c | 41 +++++---- net/wireless/core.h | 3 + net/wireless/mlme.c | 27 ++++++ net/wireless/nl80211.c | 218 +++++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.h | 11 +++ 7 files changed, 368 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index da8ea2e1927..2bfbe88837e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -270,6 +270,31 @@ * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices * associated with this wiphy must be down and will follow. * + * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified + * channel for the specified amount of time. This can be used to do + * off-channel operations like transmit a Public Action frame and wait for + * a response while being associated to an AP on another channel. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which + * radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be + * optionally used to specify additional channel parameters. + * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds + * to remain on the channel. This command is also used as an event to + * notify when the requested duration starts (it may take a while for the + * driver to schedule this time due to other concurrent needs for the + * radio). + * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE) + * that will be included with any events pertaining to this request; + * the cookie is also used to cancel the request. + * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a + * pending remain-on-channel duration if the desired operation has been + * completed prior to expiration of the originally requested duration. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the + * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to + * uniquely identify the request. + * This command is also used as an event to notify when a requested + * remain-on-channel duration has expired. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -353,6 +378,9 @@ enum nl80211_commands { NL80211_CMD_DEL_PMKSA, NL80211_CMD_FLUSH_PMKSA, + NL80211_CMD_REMAIN_ON_CHANNEL, + NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -606,6 +634,10 @@ enum nl80211_commands { * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can * cache, a wiphy attribute. * + * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32. + * + * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -743,6 +775,10 @@ enum nl80211_attrs { NL80211_ATTR_PMKID, NL80211_ATTR_MAX_NUM_PMKIDS, + NL80211_ATTR_DURATION, + + NL80211_ATTR_COOKIE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 542a477a94d..b66beb05205 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -988,6 +988,15 @@ struct cfg80211_pmksa { * * @dump_survey: get site survey information. * + * @remain_on_channel: Request the driver to remain awake on the specified + * channel for the specified duration to complete an off-channel + * operation (e.g., public action frame exchange). When the driver is + * ready on the requested channel, it must indicate this with an event + * notification by calling cfg80211_ready_on_channel(). + * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. + * This allows the operation to be terminated prior to timeout based on + * the duration value. + * * @testmode_cmd: run a test mode command * * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac @@ -1123,6 +1132,16 @@ struct cfg80211_ops { struct cfg80211_pmksa *pmksa); int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); + int (*remain_on_channel)(struct wiphy *wiphy, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, + u64 *cookie); + int (*cancel_remain_on_channel)(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie); + /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); @@ -2147,5 +2166,33 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid, void cfg80211_disconnected(struct net_device *dev, u16 reason, u8 *ie, size_t ie_len, gfp_t gfp); +/** + * cfg80211_ready_on_channel - notification of remain_on_channel start + * @dev: network device + * @cookie: the request cookie + * @chan: The current channel (from remain_on_channel request) + * @channel_type: Channel type + * @duration: Duration in milliseconds that the driver intents to remain on the + * channel + * @gfp: allocation flags + */ +void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp); + +/** + * cfg80211_remain_on_channel_expired - remain_on_channel duration expired + * @dev: network device + * @cookie: the request cookie + * @chan: The current channel (from remain_on_channel request) + * @channel_type: Channel type + * @gfp: allocation flags + */ +void cfg80211_remain_on_channel_expired(struct net_device *dev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + gfp_t gfp); #endif /* __NET_CFG80211_H */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a46ac6c9b36..bf1737fc9a7 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -41,44 +41,57 @@ rdev_fixed_channel(struct cfg80211_registered_device *rdev, return result; } -int rdev_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *for_wdev, +struct ieee80211_channel * +rdev_freq_to_chan(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type channel_type) { struct ieee80211_channel *chan; struct ieee80211_sta_ht_cap *ht_cap; - int result; - - if (rdev_fixed_channel(rdev, for_wdev)) - return -EBUSY; - - if (!rdev->ops->set_channel) - return -EOPNOTSUPP; chan = ieee80211_get_channel(&rdev->wiphy, freq); /* Primary channel not allowed */ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) - return -EINVAL; + return NULL; if (channel_type == NL80211_CHAN_HT40MINUS && chan->flags & IEEE80211_CHAN_NO_HT40MINUS) - return -EINVAL; + return NULL; else if (channel_type == NL80211_CHAN_HT40PLUS && chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - return -EINVAL; + return NULL; ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap; if (channel_type != NL80211_CHAN_NO_HT) { if (!ht_cap->ht_supported) - return -EINVAL; + return NULL; if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) - return -EINVAL; + return NULL; } + return chan; +} + +int rdev_set_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *for_wdev, + int freq, enum nl80211_channel_type channel_type) +{ + struct ieee80211_channel *chan; + int result; + + if (rdev_fixed_channel(rdev, for_wdev)) + return -EBUSY; + + if (!rdev->ops->set_channel) + return -EOPNOTSUPP; + + chan = rdev_freq_to_chan(rdev, freq, channel_type); + if (!chan) + return -EINVAL; + result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type); if (result) return result; diff --git a/net/wireless/core.h b/net/wireless/core.h index 35b71212714..30ec95f05b5 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -374,6 +374,9 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); struct ieee80211_channel * rdev_fixed_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *for_wdev); +struct ieee80211_channel * +rdev_freq_to_chan(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type channel_type); int rdev_set_freq(struct cfg80211_registered_device *rdev, struct wireless_dev *for_wdev, int freq, enum nl80211_channel_type channel_type); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index acaeaa784d6..11f6469b3f9 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -680,3 +680,30 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, } } } + +void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type, + duration, gfp); +} +EXPORT_SYMBOL(cfg80211_ready_on_channel); + +void cfg80211_remain_on_channel_expired(struct net_device *dev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan, + channel_type, gfp); +} +EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 60f854377f9..ff857f10cb8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -141,6 +141,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, + [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, }; /* policy for the attributes */ @@ -569,6 +571,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); + CMD(remain_on_channel, REMAIN_ON_CHANNEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4283,6 +4286,143 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) } +static int nl80211_remain_on_channel(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + struct ieee80211_channel *chan; + struct sk_buff *msg; + void *hdr; + u64 cookie; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + u32 freq, duration; + int err; + + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || + !info->attrs[NL80211_ATTR_DURATION]) + return -EINVAL; + + duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + + /* + * We should be on that channel for at least one jiffie, + * and more than 5 seconds seems excessive. + */ + if (!duration || !msecs_to_jiffies(duration) || duration > 5000) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->remain_on_channel) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + channel_type = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40PLUS && + channel_type != NL80211_CHAN_HT40MINUS) + err = -EINVAL; + goto out; + } + + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + chan = rdev_freq_to_chan(rdev, freq, channel_type); + if (chan == NULL) { + err = -EINVAL; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_REMAIN_ON_CHANNEL); + + if (IS_ERR(hdr)) { + err = PTR_ERR(hdr); + goto free_msg; + } + + err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan, + channel_type, duration, &cookie); + + if (err) + goto free_msg; + + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + genlmsg_end(msg, hdr); + err = genlmsg_reply(msg, info); + goto out; + + nla_put_failure: + err = -ENOBUFS; + free_msg: + nlmsg_free(msg); + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + u64 cookie; + int err; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->cancel_remain_on_channel) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4545,8 +4685,20 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, - + { + .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, + .doit = nl80211_remain_on_channel, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + .doit = nl80211_cancel_remain_on_channel, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; + static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", }; @@ -5134,6 +5286,70 @@ nla_put_failure: nlmsg_free(msg); } +static void nl80211_send_remain_on_chan_event( + int cmd, struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type); + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL) + NLA_PUT_U32(msg, NL80211_ATTR_DURATION, duration); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, + rdev, netdev, cookie, chan, + channel_type, duration, gfp); +} + +void nl80211_send_remain_on_channel_cancel( + struct cfg80211_registered_device *rdev, struct net_device *netdev, + u64 cookie, struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, gfp_t gfp) +{ + nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + rdev, netdev, cookie, chan, + channel_type, 0, gfp); +} + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 44cc2a76a1b..a5e2de419b7 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -59,4 +59,15 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp); +void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp); +void nl80211_send_remain_on_channel_cancel( + struct cfg80211_registered_device *rdev, struct net_device *netdev, + u64 cookie, struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-70-g09d2 From 1f8fef7b3388b5a976e80839679b5bae581a1091 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 24 Dec 2009 11:59:57 +0100 Subject: firewire: add fw_csr_string() helper function The core (sysfs attributes), the firedtv driver, and possible future drivers all read strings from some configuration ROM directory. Factor out the generic code from show_text_leaf() into a new helper function, modified slightly to handle arbitrary buffer sizes. Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 110 ++++++++++++++++++++++---------- drivers/media/dvb/firewire/firedtv-fw.c | 39 ++--------- include/linux/firewire.h | 2 + 3 files changed, 84 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 9d0dfcbe2c1..a39e4344cd5 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -59,6 +59,67 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value) } EXPORT_SYMBOL(fw_csr_iterator_next); +static u32 *search_leaf(u32 *directory, int search_key) +{ + struct fw_csr_iterator ci; + int last_key = 0, key, value; + + fw_csr_iterator_init(&ci, directory); + while (fw_csr_iterator_next(&ci, &key, &value)) { + if (last_key == search_key && + key == (CSR_DESCRIPTOR | CSR_LEAF)) + return ci.p - 1 + value; + last_key = key; + } + return NULL; +} + +static int textual_leaf_to_string(u32 *block, char *buf, size_t size) +{ + unsigned int quadlets, length; + + if (!size || !buf) + return -EINVAL; + + quadlets = min(block[0] >> 16, 256u); + if (quadlets < 2) + return -ENODATA; + + if (block[1] != 0 || block[2] != 0) + /* unknown language/character set */ + return -ENODATA; + + block += 3; + quadlets -= 2; + for (length = 0; length < quadlets * 4 && length + 1 < size; length++) { + char c = block[length / 4] >> (24 - 8 * (length % 4)); + if (c == '\0') + break; + buf[length] = c; + } + buf[length] = '\0'; + return length; +} + +/** + * fw_csr_string - reads a string from the configuration ROM + * @directory: device or unit directory; + * fw_device->config_rom+5 or fw_unit->directory + * @key: the key of the preceding directory entry + * @buf: where to put the string + * @size: size of @buf, in bytes + * + * Returns string length (>= 0) or error code (< 0). + */ +int fw_csr_string(u32 *directory, int key, char *buf, size_t size) +{ + u32 *leaf = search_leaf(directory, key); + if (!leaf) + return -ENOENT; + return textual_leaf_to_string(leaf, buf, size); +} +EXPORT_SYMBOL(fw_csr_string); + static bool is_fw_unit(struct device *dev); static int match_unit_directory(u32 *directory, u32 match_flags, @@ -226,10 +287,10 @@ static ssize_t show_text_leaf(struct device *dev, { struct config_rom_attribute *attr = container_of(dattr, struct config_rom_attribute, attr); - struct fw_csr_iterator ci; - u32 *dir, *block = NULL, *p, *end; - int length, key, value, last_key = 0, ret = -ENOENT; - char *b; + u32 *dir; + size_t bufsize; + char dummy_buf[2]; + int ret; down_read(&fw_device_rwsem); @@ -238,40 +299,23 @@ static ssize_t show_text_leaf(struct device *dev, else dir = fw_device(dev)->config_rom + 5; - fw_csr_iterator_init(&ci, dir); - while (fw_csr_iterator_next(&ci, &key, &value)) { - if (attr->key == last_key && - key == (CSR_DESCRIPTOR | CSR_LEAF)) - block = ci.p - 1 + value; - last_key = key; + if (buf) { + bufsize = PAGE_SIZE - 1; + } else { + buf = dummy_buf; + bufsize = 1; } - if (block == NULL) - goto out; - - length = min(block[0] >> 16, 256U); - if (length < 3) - goto out; - - if (block[1] != 0 || block[2] != 0) - /* Unknown encoding. */ - goto out; + ret = fw_csr_string(dir, attr->key, buf, bufsize); - if (buf == NULL) { - ret = length * 4; - goto out; + if (ret >= 0) { + /* Strip trailing whitespace and add newline. */ + while (ret > 0 && isspace(buf[ret - 1])) + ret--; + strcpy(buf + ret, "\n"); + ret++; } - b = buf; - end = &block[length + 1]; - for (p = &block[3]; p < end; p++, b += 4) - * (u32 *) b = (__force u32) __cpu_to_be32(*p); - - /* Strip trailing whitespace and add newline. */ - while (b--, (isspace(*b) || *b == '\0') && b > buf); - strcpy(b + 1, "\n"); - ret = b + 2 - buf; - out: up_read(&fw_device_rwsem); return ret; diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c index 6223bf01efe..4253b7ab009 100644 --- a/drivers/media/dvb/firewire/firedtv-fw.c +++ b/drivers/media/dvb/firewire/firedtv-fw.c @@ -239,47 +239,18 @@ static const struct fw_address_region fcp_region = { }; /* Adjust the template string if models with longer names appear. */ -#define MAX_MODEL_NAME_LEN ((int)DIV_ROUND_UP(sizeof("FireDTV ????"), 4)) - -static size_t model_name(u32 *directory, __be32 *buffer) -{ - struct fw_csr_iterator ci; - int i, length, key, value, last_key = 0; - u32 *block = NULL; - - fw_csr_iterator_init(&ci, directory); - while (fw_csr_iterator_next(&ci, &key, &value)) { - if (last_key == CSR_MODEL && - key == (CSR_DESCRIPTOR | CSR_LEAF)) - block = ci.p - 1 + value; - last_key = key; - } - - if (block == NULL) - return 0; - - length = min((int)(block[0] >> 16) - 2, MAX_MODEL_NAME_LEN); - if (length <= 0) - return 0; - - /* fast-forward to text string */ - block += 3; - - for (i = 0; i < length; i++) - buffer[i] = cpu_to_be32(block[i]); - - return length * 4; -} +#define MAX_MODEL_NAME_LEN sizeof("FireDTV ????") static int node_probe(struct device *dev) { struct firedtv *fdtv; - __be32 name[MAX_MODEL_NAME_LEN]; + char name[MAX_MODEL_NAME_LEN]; int name_len, err; - name_len = model_name(fw_unit(dev)->directory, name); + name_len = fw_csr_string(fw_unit(dev)->directory, CSR_MODEL, + name, sizeof(name)); - fdtv = fdtv_alloc(dev, &backend, (char *)name, name_len); + fdtv = fdtv_alloc(dev, &backend, name, name_len >= 0 ? name_len : 0); if (!fdtv) return -ENOMEM; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index a0e67150a72..5246869d808 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -72,6 +72,8 @@ struct fw_csr_iterator { void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p); int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); +int fw_csr_string(u32 *directory, int key, char *buf, size_t size); + extern struct bus_type fw_bus_type; struct fw_card_driver; -- cgit v1.2.3-70-g09d2 From 3c2c58cb33b3b15a2c4871babeec8fe1456e1db6 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 26 Dec 2009 01:43:21 +0100 Subject: firewire: core: fw_csr_string addendum Witespace and comment changes, and a different way to say i + 1 < end. Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 26 ++++++++++++++++---------- include/linux/firewire.h | 1 - 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index a39e4344cd5..5d5c6a68983 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -69,19 +69,22 @@ static u32 *search_leaf(u32 *directory, int search_key) if (last_key == search_key && key == (CSR_DESCRIPTOR | CSR_LEAF)) return ci.p - 1 + value; + last_key = key; } + return NULL; } static int textual_leaf_to_string(u32 *block, char *buf, size_t size) { - unsigned int quadlets, length; + unsigned int quadlets, i; + char c; if (!size || !buf) return -EINVAL; - quadlets = min(block[0] >> 16, 256u); + quadlets = min(block[0] >> 16, 256U); if (quadlets < 2) return -ENODATA; @@ -91,31 +94,34 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size) block += 3; quadlets -= 2; - for (length = 0; length < quadlets * 4 && length + 1 < size; length++) { - char c = block[length / 4] >> (24 - 8 * (length % 4)); + for (i = 0; i < quadlets * 4 && i < size - 1; i++) { + c = block[i / 4] >> (24 - 8 * (i % 4)); if (c == '\0') break; - buf[length] = c; + buf[i] = c; } - buf[length] = '\0'; - return length; + buf[i] = '\0'; + + return i; } /** * fw_csr_string - reads a string from the configuration ROM - * @directory: device or unit directory; - * fw_device->config_rom+5 or fw_unit->directory + * @directory: e.g. root directory or unit directory * @key: the key of the preceding directory entry * @buf: where to put the string * @size: size of @buf, in bytes * - * Returns string length (>= 0) or error code (< 0). + * The string is taken from a minimal ASCII text descriptor leaf after + * the immediate entry with @key. The string is zero-terminated. + * Returns strlen(buf) or a negative error code. */ int fw_csr_string(u32 *directory, int key, char *buf, size_t size) { u32 *leaf = search_leaf(directory, key); if (!leaf) return -ENOENT; + return textual_leaf_to_string(leaf, buf, size); } EXPORT_SYMBOL(fw_csr_string); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 5246869d808..df680216e7b 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -71,7 +71,6 @@ struct fw_csr_iterator { void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p); int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); - int fw_csr_string(u32 *directory, int key, char *buf, size_t size); extern struct bus_type fw_bus_type; -- cgit v1.2.3-70-g09d2 From 13b302d0a217580c0129b0641b0ca8b592e437b0 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 26 Dec 2009 01:44:10 +0100 Subject: firewire: qualify config ROM cache pointers as const pointers Several config ROM related functions only peek at the ROM cache; mark their arguments as const pointers. Ditto fw_device.config_rom and fw_unit.directory, as the memory behind them is meant to be write-once. Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 21 +++++++++++---------- drivers/firewire/sbp2.c | 5 +++-- include/linux/firewire.h | 12 ++++++------ 3 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 5d5c6a68983..eecd52dc8e9 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -43,7 +43,7 @@ #include "core.h" -void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 * p) +void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p) { ci->p = p + 1; ci->end = ci->p + (p[0] >> 16); @@ -59,7 +59,7 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value) } EXPORT_SYMBOL(fw_csr_iterator_next); -static u32 *search_leaf(u32 *directory, int search_key) +static const u32 *search_leaf(const u32 *directory, int search_key) { struct fw_csr_iterator ci; int last_key = 0, key, value; @@ -76,7 +76,7 @@ static u32 *search_leaf(u32 *directory, int search_key) return NULL; } -static int textual_leaf_to_string(u32 *block, char *buf, size_t size) +static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) { unsigned int quadlets, i; char c; @@ -116,9 +116,9 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size) * the immediate entry with @key. The string is zero-terminated. * Returns strlen(buf) or a negative error code. */ -int fw_csr_string(u32 *directory, int key, char *buf, size_t size) +int fw_csr_string(const u32 *directory, int key, char *buf, size_t size) { - u32 *leaf = search_leaf(directory, key); + const u32 *leaf = search_leaf(directory, key); if (!leaf) return -ENOENT; @@ -128,7 +128,7 @@ EXPORT_SYMBOL(fw_csr_string); static bool is_fw_unit(struct device *dev); -static int match_unit_directory(u32 *directory, u32 match_flags, +static int match_unit_directory(const u32 *directory, u32 match_flags, const struct ieee1394_device_id *id) { struct fw_csr_iterator ci; @@ -262,7 +262,7 @@ static ssize_t show_immediate(struct device *dev, struct config_rom_attribute *attr = container_of(dattr, struct config_rom_attribute, attr); struct fw_csr_iterator ci; - u32 *dir; + const u32 *dir; int key, value, ret = -ENOENT; down_read(&fw_device_rwsem); @@ -293,7 +293,7 @@ static ssize_t show_text_leaf(struct device *dev, { struct config_rom_attribute *attr = container_of(dattr, struct config_rom_attribute, attr); - u32 *dir; + const u32 *dir; size_t bufsize; char dummy_buf[2]; int ret; @@ -421,7 +421,7 @@ static ssize_t guid_show(struct device *dev, return ret; } -static int units_sprintf(char *buf, u32 *directory) +static int units_sprintf(char *buf, const u32 *directory) { struct fw_csr_iterator ci; int key, value; @@ -503,7 +503,8 @@ static int read_rom(struct fw_device *device, */ static int read_bus_info_block(struct fw_device *device, int generation) { - u32 *rom, *stack, *old_rom, *new_rom; + const u32 *old_rom, *new_rom; + u32 *rom, *stack; u32 sp, key; int i, end, length, ret = -1; diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index d485cdd8cba..7e33b0b1704 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1014,7 +1014,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) return 0; } -static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory) +static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, + const u32 *directory) { struct fw_csr_iterator ci; int key, value; @@ -1027,7 +1028,7 @@ static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory) return 0; } -static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory, +static int sbp2_scan_unit_dir(struct sbp2_target *tgt, const u32 *directory, u32 *model, u32 *firmware_revision) { struct fw_csr_iterator ci; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index df680216e7b..4bd94bf5e73 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -65,13 +65,13 @@ #define CSR_DIRECTORY_ID 0x20 struct fw_csr_iterator { - u32 *p; - u32 *end; + const u32 *p; + const u32 *end; }; -void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p); +void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p); int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value); -int fw_csr_string(u32 *directory, int key, char *buf, size_t size); +int fw_csr_string(const u32 *directory, int key, char *buf, size_t size); extern struct bus_type fw_bus_type; @@ -163,7 +163,7 @@ struct fw_device { struct mutex client_list_mutex; struct list_head client_list; - u32 *config_rom; + const u32 *config_rom; size_t config_rom_length; int config_rom_retries; unsigned is_local:1; @@ -205,7 +205,7 @@ int fw_device_enable_phys_dma(struct fw_device *device); */ struct fw_unit { struct device device; - u32 *directory; + const u32 *directory; struct fw_attribute_group attribute_group; }; -- cgit v1.2.3-70-g09d2 From c1c5523dd1517250cac8b15a4acbc237c24a67d4 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 23 Dec 2009 01:27:48 +0000 Subject: can/netlink: add CAN_CTRLMODE_ONE_SHOT This patch adds the flag CAN_CTRLMODE_ONE_SHOT. It is used as mask or flag in the "struct can_ctrlmode". It allows userspace via netlink to set a CAN controller into the special "one-shot" mode. In this mode, if supported by the CAN controller, it tries only once to deliver a CAN frame and aborts it if an error (e.g.: arbitration lost) happens. Signed-off-by: Marc Kleine-Budde Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- include/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h index 9ecbb7871c0..c818335fbb1 100644 --- a/include/linux/can/netlink.h +++ b/include/linux/can/netlink.h @@ -80,6 +80,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_LOOPBACK 0x1 /* Loopback mode */ #define CAN_CTRLMODE_LISTENONLY 0x2 /* Listen-only mode */ #define CAN_CTRLMODE_3_SAMPLES 0x4 /* Triple sampling mode */ +#define CAN_CTRLMODE_ONE_SHOT 0x8 /* One-Shot mode */ /* * CAN device statistics -- cgit v1.2.3-70-g09d2 From cf2f765f1896064e34c6f0f2ef896ff058dd5c06 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 4 Jan 2010 12:20:56 +0100 Subject: HID: handle joysticks with large number of buttons Current HID code doesn't properly handle HID joysticks which have larger number of buttons than what fits into current range reserved for BTN_JOYSTICK. One such joystick reported to not work properly is Saitek X52 Pro Flight System. We can't extend the range to fit more buttons in, because of backwards compatibility reasons. Therefore this patch introduces a new BTN_TRIGGER_HAPPY range, and uses these to map the buttons which are over BTN_JOYSTICK limit. Acked-by: Dmitry Torokhov [for the input.h part] Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 7 ++++++- include/linux/input.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5862b0f3b55..dad7aae9c97 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -198,7 +198,12 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel switch (field->application) { case HID_GD_MOUSE: case HID_GD_POINTER: code += 0x110; break; - case HID_GD_JOYSTICK: code += 0x120; break; + case HID_GD_JOYSTICK: + if (code <= 0xf) + code += BTN_JOYSTICK; + else + code += BTN_TRIGGER_HAPPY; + break; case HID_GD_GAMEPAD: code += 0x130; break; default: switch (field->physical) { diff --git a/include/linux/input.h b/include/linux/input.h index 7be8a6537b5..97f98ca9b04 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -597,6 +597,48 @@ struct input_absinfo { #define KEY_CAMERA_FOCUS 0x210 +#define BTN_TRIGGER_HAPPY 0x2c0 +#define BTN_TRIGGER_HAPPY1 0x2c0 +#define BTN_TRIGGER_HAPPY2 0x2c1 +#define BTN_TRIGGER_HAPPY3 0x2c2 +#define BTN_TRIGGER_HAPPY4 0x2c3 +#define BTN_TRIGGER_HAPPY5 0x2c4 +#define BTN_TRIGGER_HAPPY6 0x2c5 +#define BTN_TRIGGER_HAPPY7 0x2c6 +#define BTN_TRIGGER_HAPPY8 0x2c7 +#define BTN_TRIGGER_HAPPY9 0x2c8 +#define BTN_TRIGGER_HAPPY10 0x2c9 +#define BTN_TRIGGER_HAPPY11 0x2ca +#define BTN_TRIGGER_HAPPY12 0x2cb +#define BTN_TRIGGER_HAPPY13 0x2cc +#define BTN_TRIGGER_HAPPY14 0x2cd +#define BTN_TRIGGER_HAPPY15 0x2ce +#define BTN_TRIGGER_HAPPY16 0x2cf +#define BTN_TRIGGER_HAPPY17 0x2d0 +#define BTN_TRIGGER_HAPPY18 0x2d1 +#define BTN_TRIGGER_HAPPY19 0x2d2 +#define BTN_TRIGGER_HAPPY20 0x2d3 +#define BTN_TRIGGER_HAPPY21 0x2d4 +#define BTN_TRIGGER_HAPPY22 0x2d5 +#define BTN_TRIGGER_HAPPY23 0x2d6 +#define BTN_TRIGGER_HAPPY24 0x2d7 +#define BTN_TRIGGER_HAPPY25 0x2d8 +#define BTN_TRIGGER_HAPPY26 0x2d9 +#define BTN_TRIGGER_HAPPY27 0x2da +#define BTN_TRIGGER_HAPPY28 0x2db +#define BTN_TRIGGER_HAPPY29 0x2dc +#define BTN_TRIGGER_HAPPY30 0x2dd +#define BTN_TRIGGER_HAPPY31 0x2de +#define BTN_TRIGGER_HAPPY32 0x2df +#define BTN_TRIGGER_HAPPY33 0x2e0 +#define BTN_TRIGGER_HAPPY34 0x2e1 +#define BTN_TRIGGER_HAPPY35 0x2e2 +#define BTN_TRIGGER_HAPPY36 0x2e3 +#define BTN_TRIGGER_HAPPY37 0x2e4 +#define BTN_TRIGGER_HAPPY38 0x2e5 +#define BTN_TRIGGER_HAPPY39 0x2e6 +#define BTN_TRIGGER_HAPPY40 0x2e7 + /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff -- cgit v1.2.3-70-g09d2 From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- kernel/module.c | 29 +++++++++++++++-------------- 2 files changed, 29 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3cab5d..2302f09ea2d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } diff --git a/kernel/module.c b/kernel/module.c index e96b8ed1cb6..9bf228052ec 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod) INIT_LIST_HEAD(&mod->modules_which_use_me); for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + per_cpu_ptr(mod->refptr, cpu)->count = 0; + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod) int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + total += per_cpu_ptr(mod->refptr, cpu)->count; return total; } EXPORT_SYMBOL(module_refcount); @@ -796,14 +797,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_dec(module->refptr->count); + trace_module_put(module, _RET_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -1394,9 +1396,9 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod, kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: #endif module_free(mod, mod->module_init); -- cgit v1.2.3-70-g09d2 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d..7e74ae0051c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c..eb6c8988c31 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c..df74c798225 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.2.3-70-g09d2 From 99dcc3e5a94ed491fbef402831d8c0bbb267f995 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:51 +0900 Subject: this_cpu: Page allocator conversion Use the per cpu allocator functionality to avoid per cpu arrays in struct zone. This drastically reduces the size of struct zone for systems with large amounts of processors and allows placement of critical variables of struct zone in one cacheline even on very large systems. Another effect is that the pagesets of one processor are placed near one another. If multiple pagesets from different zones fit into one cacheline then additional cacheline fetches can be avoided on the hot paths when allocating memory from multiple zones. Bootstrap becomes simpler if we use the same scheme for UP, SMP, NUMA. #ifdefs are reduced and we can drop the zone_pcp macro. Hotplug handling is also simplified since cpu alloc can bring up and shut down cpu areas for a specific cpu as a whole. So there is no need to allocate or free individual pagesets. V7-V8: - Explain chicken egg dilemmna with percpu allocator. V4-V5: - Fix up cases where per_cpu_ptr is called before irq disable - Integrate the bootstrap logic that was separate before. tj: Build failure in pageset_cpuup_callback() due to missing ret variable fixed. Reviewed-by: Mel Gorman Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/mm.h | 4 - include/linux/mmzone.h | 12 +-- mm/page_alloc.c | 202 +++++++++++++++++-------------------------------- mm/vmstat.c | 14 ++-- 4 files changed, 81 insertions(+), 151 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2265f28eb47..554fa395aac 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -#ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -#else -static inline void setup_per_cpu_pageset(void) {} -#endif extern void zone_pcp_update(struct zone *zone); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 30fe668c254..7874201a355 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,13 +184,7 @@ struct per_cpu_pageset { s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; #endif -} ____cacheline_aligned_in_smp; - -#ifdef CONFIG_NUMA -#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) -#else -#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) -#endif +}; #endif /* !__GENERATING_BOUNDS.H */ @@ -306,10 +300,8 @@ struct zone { */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; - struct per_cpu_pageset *pageset[NR_CPUS]; -#else - struct per_cpu_pageset pageset[NR_CPUS]; #endif + struct per_cpu_pageset *pageset; /* * free areas of different sizes */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e9f5cc5fb5..6849e870de5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + local_irq_save(flags); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - local_irq_save(flags); free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; local_irq_restore(flags); @@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold) arch_free_page(page, 0); kernel_map_pages(page, 1, 0); - pcp = &zone_pcp(zone, get_cpu())->pcp; migratetype = get_pageblock_migratetype(page); set_page_private(page, migratetype); local_irq_save(flags); @@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold) migratetype = MIGRATE_MOVABLE; } + pcp = &this_cpu_ptr(zone->pageset)->pcp; if (cold) list_add_tail(&page->lru, &pcp->lists[migratetype]); else @@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold) out: local_irq_restore(flags); - put_cpu(); } void free_hot_page(struct page *page) @@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); - int cpu; again: - cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; struct list_head *list; - pcp = &zone_pcp(zone, cpu)->pcp; - list = &pcp->lists[migratetype]; local_irq_save(flags); + pcp = &this_cpu_ptr(zone->pageset)->pcp; + list = &pcp->lists[migratetype]; if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, @@ -1231,7 +1228,6 @@ again: __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); - put_cpu(); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) @@ -1240,7 +1236,6 @@ again: failed: local_irq_restore(flags); - put_cpu(); return NULL; } @@ -2179,7 +2174,7 @@ void show_free_areas(void) for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, cpu); + pageset = per_cpu_ptr(zone->pageset, cpu); printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", cpu, pageset->pcp.high, @@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ +/* + * Boot pageset table. One per cpu which is going to be used for all + * zones and all nodes. The parameters will be set in such a way + * that an item put on a list will immediately be handed over to + * the buddy list. This is safe since pageset manipulation is done + * with interrupts disabled. + * + * The boot_pagesets must be kept even after bootup is complete for + * unused processors and/or zones. They do play a role for bootstrapping + * hotplugged processors. + * + * zoneinfo_show() and maybe other functions do + * not check if the processor is online before following the pageset pointer. + * Other parts of the kernel may not check if the zone is available. + */ +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); +static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); + /* return values int ....just for stop_machine() */ static int __build_all_zonelists(void *dummy) { int nid; + int cpu; #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy) build_zonelists(pgdat); build_zonelist_cache(pgdat); } + + /* + * Initialize the boot_pagesets that are going to be used + * for bootstrapping processors. The real pagesets for + * each zone will be allocated later when the per cpu + * allocator is available. + * + * boot_pagesets are used also for bootstrapping offline + * cpus if the system is already booted because the pagesets + * are needed to initialize allocators on a specific cpu too. + * F.e. the percpu allocator needs the page allocator which + * needs the percpu allocator in order to allocate its pagesets + * (a chicken-egg dilemma). + */ + for_each_possible_cpu(cpu) + setup_pageset(&per_cpu(boot_pageset, cpu), 0); + return 0; } @@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, pcp->batch = PAGE_SHIFT * 8; } - -#ifdef CONFIG_NUMA -/* - * Boot pageset table. One per cpu which is going to be used for all - * zones and all nodes. The parameters will be set in such a way - * that an item put on a list will immediately be handed over to - * the buddy list. This is safe since pageset manipulation is done - * with interrupts disabled. - * - * Some NUMA counter updates may also be caught by the boot pagesets. - * - * The boot_pagesets must be kept even after bootup is complete for - * unused processors and/or zones. They do play a role for bootstrapping - * hotplugged processors. - * - * zoneinfo_show() and maybe other functions do - * not check if the processor is online before following the pageset pointer. - * Other parts of the kernel may not check if the zone is available. - */ -static struct per_cpu_pageset boot_pageset[NR_CPUS]; - /* - * Dynamically allocate memory for the - * per cpu pageset array in struct zone. + * Allocate per cpu pagesets and initialize them. + * Before this call only boot pagesets were available. + * Boot pagesets will no longer be used by this processorr + * after setup_per_cpu_pageset(). */ -static int __cpuinit process_zones(int cpu) +void __init setup_per_cpu_pageset(void) { - struct zone *zone, *dzone; - int node = cpu_to_node(cpu); - - node_set_state(node, N_CPU); /* this node has a cpu */ + struct zone *zone; + int cpu; for_each_populated_zone(zone) { - zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), - GFP_KERNEL, node); - if (!zone_pcp(zone, cpu)) - goto bad; - - setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); - - if (percpu_pagelist_fraction) - setup_pagelist_highmark(zone_pcp(zone, cpu), - (zone->present_pages / percpu_pagelist_fraction)); - } - - return 0; -bad: - for_each_zone(dzone) { - if (!populated_zone(dzone)) - continue; - if (dzone == zone) - break; - kfree(zone_pcp(dzone, cpu)); - zone_pcp(dzone, cpu) = &boot_pageset[cpu]; - } - return -ENOMEM; -} + zone->pageset = alloc_percpu(struct per_cpu_pageset); -static inline void free_zone_pagesets(int cpu) -{ - struct zone *zone; - - for_each_zone(zone) { - struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + for_each_possible_cpu(cpu) { + struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); - /* Free per_cpu_pageset if it is slab allocated */ - if (pset != &boot_pageset[cpu]) - kfree(pset); - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - } -} + setup_pageset(pcp, zone_batchsize(zone)); -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - int cpu = (long)hcpu; - int ret = NOTIFY_OK; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (process_zones(cpu)) - ret = NOTIFY_BAD; - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - free_zone_pagesets(cpu); - break; - default: - break; + if (percpu_pagelist_fraction) + setup_pagelist_highmark(pcp, + (zone->present_pages / + percpu_pagelist_fraction)); + } } - return ret; } -static struct notifier_block __cpuinitdata pageset_notifier = - { &pageset_cpuup_callback, NULL, 0 }; - -void __init setup_per_cpu_pageset(void) -{ - int err; - - /* Initialize per_cpu_pageset for cpu 0. - * A cpuup callback will do this for every cpu - * as it comes online - */ - err = process_zones(smp_processor_id()); - BUG_ON(err); - register_cpu_notifier(&pageset_notifier); -} - -#endif - static noinline __init_refok int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { @@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; local_irq_save(flags); @@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone) static __meminit void zone_pcp_init(struct zone *zone) { - int cpu; - unsigned long batch = zone_batchsize(zone); + /* + * per cpu subsystem is not up at this point. The following code + * relies on the ability of the linker to provide the + * offset of a (static) per cpu variable into the per cpu area. + */ + zone->pageset = &boot_pageset; - for (cpu = 0; cpu < NR_CPUS; cpu++) { -#ifdef CONFIG_NUMA - /* Early boot. Slab allocator not functional yet */ - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - setup_pageset(&boot_pageset[cpu],0); -#else - setup_pageset(zone_pcp(zone,cpu), batch); -#endif - } if (zone->present_pages) - printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", - zone->name, zone->present_pages, batch); + printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", + zone->name, zone->present_pages, + zone_batchsize(zone)); } __meminit int init_currently_empty_zone(struct zone *zone, @@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; - setup_pagelist_highmark(zone_pcp(zone, cpu), high); + setup_pagelist_highmark( + per_cpu_ptr(zone->pageset, cpu), high); } } return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 6051fbab67b..1ba0bb7ad04 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void) threshold = calculate_threshold(zone); for_each_online_cpu(cpu) - zone_pcp(zone, cpu)->stat_threshold = threshold; + per_cpu_ptr(zone->pageset, cpu)->stat_threshold + = threshold; } } @@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void) void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); + s8 *p = pcp->vm_stat_diff + item; long x; @@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)++; @@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)--; @@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu) for_each_populated_zone(zone) { struct per_cpu_pageset *p; - p = zone_pcp(zone, cpu); + p = per_cpu_ptr(zone->pageset, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) if (p->vm_stat_diff[i]) { @@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, for_each_online_cpu(i) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, i); + pageset = per_cpu_ptr(zone->pageset, i); seq_printf(m, "\n cpu: %i" "\n count: %i" -- cgit v1.2.3-70-g09d2 From ddf1ffbd40c92ff1e58c45fa96d309788f7beb60 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Jan 2010 17:56:04 -0800 Subject: Input: serio - let device core tell us if device was registered No need to keep track of it by ourselves. Signed-off-by: Dmitry Torokhov --- drivers/input/serio/serio.c | 8 ++------ include/linux/serio.h | 1 - 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 0a278f9f1d3..d89880450f7 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -577,8 +577,6 @@ static void serio_add_port(struct serio *serio) printk(KERN_ERR "serio: device_add() failed for %s (%s), error: %d\n", serio->phys, serio->name, error); - else - serio->registered = true; } /* @@ -605,10 +603,8 @@ static void serio_destroy_port(struct serio *serio) serio->parent = NULL; } - if (serio->registered) { + if (device_is_registered(&serio->dev)) device_del(&serio->dev); - serio->registered = false; - } list_del_init(&serio->node); serio_remove_pending_events(serio); @@ -995,7 +991,7 @@ irqreturn_t serio_interrupt(struct serio *serio, if (likely(serio->drv)) { ret = serio->drv->interrupt(serio, data, dfl); - } else if (!dfl && serio->registered) { + } else if (!dfl && device_is_registered(&serio->dev)) { serio_rescan(serio); ret = IRQ_HANDLED; } diff --git a/include/linux/serio.h b/include/linux/serio.h index e2f3044d4a4..d0fb702059c 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -30,7 +30,6 @@ struct serio { char phys[32]; bool manual_bind; - bool registered; /* port has been fully registered with driver core */ struct serio_device_id id; -- cgit v1.2.3-70-g09d2 From 361b7b5b032338361ea88412f1fc45479fdd5859 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Jan 2010 17:56:03 -0800 Subject: Input: gameport - let device core tell us if device was registered No need to keep track of it by ourselves. Signed-off-by: Dmitry Torokhov --- drivers/input/gameport/gameport.c | 6 +----- include/linux/gameport.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index ac11be08585..f9e5f8e1690 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -561,8 +561,6 @@ static void gameport_add_port(struct gameport *gameport) printk(KERN_ERR "gameport: device_add() failed for %s (%s), error: %d\n", gameport->phys, gameport->name, error); - else - gameport->registered = 1; } /* @@ -584,10 +582,8 @@ static void gameport_destroy_port(struct gameport *gameport) gameport->parent = NULL; } - if (gameport->registered) { + if (device_is_registered(&gameport->dev)) device_del(&gameport->dev); - gameport->registered = 0; - } list_del_init(&gameport->node); diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 1bc08541c2b..48e68da097f 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -46,7 +46,6 @@ struct gameport { struct mutex drv_mutex; /* protects serio->drv so attributes can pin driver */ struct device dev; - unsigned int registered; /* port has been fully registered with driver core */ struct list_head node; }; -- cgit v1.2.3-70-g09d2 From 16295bec6398a3eedc9377e1af6ff4c71b98c300 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 6 Jan 2010 19:47:10 +1100 Subject: padata: Generic parallelization/serialization interface This patch introduces an interface to process data objects in parallel. The parallelized objects return after serialization in the same order as they were before the parallelization. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- include/linux/padata.h | 88 +++++++ init/Kconfig | 4 + kernel/Makefile | 1 + kernel/padata.c | 690 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 783 insertions(+) create mode 100644 include/linux/padata.h create mode 100644 kernel/padata.c (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h new file mode 100644 index 00000000000..51611da9c49 --- /dev/null +++ b/include/linux/padata.h @@ -0,0 +1,88 @@ +/* + * padata.h - header for the padata parallelization interface + * + * Copyright (C) 2008, 2009 secunet Security Networks AG + * Copyright (C) 2008, 2009 Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef PADATA_H +#define PADATA_H + +#include +#include +#include + +struct padata_priv { + struct list_head list; + struct parallel_data *pd; + int cb_cpu; + int seq_nr; + int info; + void (*parallel)(struct padata_priv *padata); + void (*serial)(struct padata_priv *padata); +}; + +struct padata_list { + struct list_head list; + spinlock_t lock; +}; + +struct padata_queue { + struct padata_list parallel; + struct padata_list reorder; + struct padata_list serial; + struct work_struct pwork; + struct work_struct swork; + struct parallel_data *pd; + atomic_t num_obj; + int cpu_index; +}; + +struct parallel_data { + struct padata_instance *pinst; + struct padata_queue *queue; + atomic_t seq_nr; + atomic_t reorder_objects; + atomic_t refcnt; + unsigned int max_seq_nr; + cpumask_var_t cpumask; + spinlock_t lock; +}; + +struct padata_instance { + struct notifier_block cpu_notifier; + struct workqueue_struct *wq; + struct parallel_data *pd; + cpumask_var_t cpumask; + struct mutex lock; + u8 flags; +#define PADATA_INIT 1 +#define PADATA_RESET 2 +}; + +extern struct padata_instance *padata_alloc(const struct cpumask *cpumask, + struct workqueue_struct *wq); +extern void padata_free(struct padata_instance *pinst); +extern int padata_do_parallel(struct padata_instance *pinst, + struct padata_priv *padata, int cb_cpu); +extern void padata_do_serial(struct padata_priv *padata); +extern int padata_set_cpumask(struct padata_instance *pinst, + cpumask_var_t cpumask); +extern int padata_add_cpu(struct padata_instance *pinst, int cpu); +extern int padata_remove_cpu(struct padata_instance *pinst, int cpu); +extern void padata_start(struct padata_instance *pinst); +extern void padata_stop(struct padata_instance *pinst); +#endif diff --git a/init/Kconfig b/init/Kconfig index a23da9f0180..9fd23bcc170 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1252,4 +1252,8 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +config PADATA + depends on SMP + bool + source "kernel/Kconfig.locks" diff --git a/kernel/Makefile b/kernel/Makefile index 864ff75d65f..6aebdeb2aa3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o +obj-$(CONFIG_PADATA) += padata.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/padata.c b/kernel/padata.c new file mode 100644 index 00000000000..6f9bcb8313d --- /dev/null +++ b/kernel/padata.c @@ -0,0 +1,690 @@ +/* + * padata.c - generic interface to process data streams in parallel + * + * Copyright (C) 2008, 2009 secunet Security Networks AG + * Copyright (C) 2008, 2009 Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SEQ_NR INT_MAX - NR_CPUS +#define MAX_OBJ_NUM 10000 * NR_CPUS + +static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) +{ + int cpu, target_cpu; + + target_cpu = cpumask_first(pd->cpumask); + for (cpu = 0; cpu < cpu_index; cpu++) + target_cpu = cpumask_next(target_cpu, pd->cpumask); + + return target_cpu; +} + +static int padata_cpu_hash(struct padata_priv *padata) +{ + int cpu_index; + struct parallel_data *pd; + + pd = padata->pd; + + /* + * Hash the sequence numbers to the cpus by taking + * seq_nr mod. number of cpus in use. + */ + cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask); + + return padata_index_to_cpu(pd, cpu_index); +} + +static void padata_parallel_worker(struct work_struct *work) +{ + struct padata_queue *queue; + struct parallel_data *pd; + struct padata_instance *pinst; + LIST_HEAD(local_list); + + local_bh_disable(); + queue = container_of(work, struct padata_queue, pwork); + pd = queue->pd; + pinst = pd->pinst; + + spin_lock(&queue->parallel.lock); + list_replace_init(&queue->parallel.list, &local_list); + spin_unlock(&queue->parallel.lock); + + while (!list_empty(&local_list)) { + struct padata_priv *padata; + + padata = list_entry(local_list.next, + struct padata_priv, list); + + list_del_init(&padata->list); + + padata->parallel(padata); + } + + local_bh_enable(); +} + +/* + * padata_do_parallel - padata parallelization function + * + * @pinst: padata instance + * @padata: object to be parallelized + * @cb_cpu: cpu the serialization callback function will run on, + * must be in the cpumask of padata. + * + * The parallelization callback function will run with BHs off. + * Note: Every object which is parallelized by padata_do_parallel + * must be seen by padata_do_serial. + */ +int padata_do_parallel(struct padata_instance *pinst, + struct padata_priv *padata, int cb_cpu) +{ + int target_cpu, err; + struct padata_queue *queue; + struct parallel_data *pd; + + rcu_read_lock_bh(); + + pd = rcu_dereference(pinst->pd); + + err = 0; + if (!(pinst->flags & PADATA_INIT)) + goto out; + + err = -EBUSY; + if ((pinst->flags & PADATA_RESET)) + goto out; + + if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) + goto out; + + err = -EINVAL; + if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) + goto out; + + err = -EINPROGRESS; + atomic_inc(&pd->refcnt); + padata->pd = pd; + padata->cb_cpu = cb_cpu; + + if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) + atomic_set(&pd->seq_nr, -1); + + padata->seq_nr = atomic_inc_return(&pd->seq_nr); + + target_cpu = padata_cpu_hash(padata); + queue = per_cpu_ptr(pd->queue, target_cpu); + + spin_lock(&queue->parallel.lock); + list_add_tail(&padata->list, &queue->parallel.list); + spin_unlock(&queue->parallel.lock); + + queue_work_on(target_cpu, pinst->wq, &queue->pwork); + +out: + rcu_read_unlock_bh(); + + return err; +} +EXPORT_SYMBOL(padata_do_parallel); + +static struct padata_priv *padata_get_next(struct parallel_data *pd) +{ + int cpu, num_cpus, empty, calc_seq_nr; + int seq_nr, next_nr, overrun, next_overrun; + struct padata_queue *queue, *next_queue; + struct padata_priv *padata; + struct padata_list *reorder; + + empty = 0; + next_nr = -1; + next_overrun = 0; + next_queue = NULL; + + num_cpus = cpumask_weight(pd->cpumask); + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + reorder = &queue->reorder; + + /* + * Calculate the seq_nr of the object that should be + * next in this queue. + */ + overrun = 0; + calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) + + queue->cpu_index; + + if (unlikely(calc_seq_nr > pd->max_seq_nr)) { + calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; + overrun = 1; + } + + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + + seq_nr = padata->seq_nr; + BUG_ON(calc_seq_nr != seq_nr); + } else { + seq_nr = calc_seq_nr; + empty++; + } + + if (next_nr < 0 || seq_nr < next_nr + || (next_overrun && !overrun)) { + next_nr = seq_nr; + next_overrun = overrun; + next_queue = queue; + } + } + + padata = NULL; + + if (empty == num_cpus) + goto out; + + reorder = &next_queue->reorder; + + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + + if (unlikely(next_overrun)) { + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + atomic_set(&queue->num_obj, 0); + } + } + + spin_lock(&reorder->lock); + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); + spin_unlock(&reorder->lock); + + atomic_inc(&next_queue->num_obj); + + goto out; + } + + if (next_nr % num_cpus == next_queue->cpu_index) { + padata = ERR_PTR(-ENODATA); + goto out; + } + + padata = ERR_PTR(-EINPROGRESS); +out: + return padata; +} + +static void padata_reorder(struct parallel_data *pd) +{ + struct padata_priv *padata; + struct padata_queue *queue; + struct padata_instance *pinst = pd->pinst; + +try_again: + if (!spin_trylock_bh(&pd->lock)) + goto out; + + while (1) { + padata = padata_get_next(pd); + + if (!padata || PTR_ERR(padata) == -EINPROGRESS) + break; + + if (PTR_ERR(padata) == -ENODATA) { + spin_unlock_bh(&pd->lock); + goto out; + } + + queue = per_cpu_ptr(pd->queue, padata->cb_cpu); + + spin_lock(&queue->serial.lock); + list_add_tail(&padata->list, &queue->serial.list); + spin_unlock(&queue->serial.lock); + + queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork); + } + + spin_unlock_bh(&pd->lock); + + if (atomic_read(&pd->reorder_objects)) + goto try_again; + +out: + return; +} + +static void padata_serial_worker(struct work_struct *work) +{ + struct padata_queue *queue; + struct parallel_data *pd; + LIST_HEAD(local_list); + + local_bh_disable(); + queue = container_of(work, struct padata_queue, swork); + pd = queue->pd; + + spin_lock(&queue->serial.lock); + list_replace_init(&queue->serial.list, &local_list); + spin_unlock(&queue->serial.lock); + + while (!list_empty(&local_list)) { + struct padata_priv *padata; + + padata = list_entry(local_list.next, + struct padata_priv, list); + + list_del_init(&padata->list); + + padata->serial(padata); + atomic_dec(&pd->refcnt); + } + local_bh_enable(); +} + +/* + * padata_do_serial - padata serialization function + * + * @padata: object to be serialized. + * + * padata_do_serial must be called for every parallelized object. + * The serialization callback function will run with BHs off. + */ +void padata_do_serial(struct padata_priv *padata) +{ + int cpu; + struct padata_queue *queue; + struct parallel_data *pd; + + pd = padata->pd; + + cpu = get_cpu(); + queue = per_cpu_ptr(pd->queue, cpu); + + spin_lock(&queue->reorder.lock); + atomic_inc(&pd->reorder_objects); + list_add_tail(&padata->list, &queue->reorder.list); + spin_unlock(&queue->reorder.lock); + + put_cpu(); + + padata_reorder(pd); +} +EXPORT_SYMBOL(padata_do_serial); + +static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, + const struct cpumask *cpumask) +{ + int cpu, cpu_index, num_cpus; + struct padata_queue *queue; + struct parallel_data *pd; + + cpu_index = 0; + + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); + if (!pd) + goto err; + + pd->queue = alloc_percpu(struct padata_queue); + if (!pd->queue) + goto err_free_pd; + + if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) + goto err_free_queue; + + for_each_possible_cpu(cpu) { + queue = per_cpu_ptr(pd->queue, cpu); + + queue->pd = pd; + + if (cpumask_test_cpu(cpu, cpumask) + && cpumask_test_cpu(cpu, cpu_active_mask)) { + queue->cpu_index = cpu_index; + cpu_index++; + } else + queue->cpu_index = -1; + + INIT_LIST_HEAD(&queue->reorder.list); + INIT_LIST_HEAD(&queue->parallel.list); + INIT_LIST_HEAD(&queue->serial.list); + spin_lock_init(&queue->reorder.lock); + spin_lock_init(&queue->parallel.lock); + spin_lock_init(&queue->serial.lock); + + INIT_WORK(&queue->pwork, padata_parallel_worker); + INIT_WORK(&queue->swork, padata_serial_worker); + atomic_set(&queue->num_obj, 0); + } + + cpumask_and(pd->cpumask, cpumask, cpu_active_mask); + + num_cpus = cpumask_weight(pd->cpumask); + pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; + + atomic_set(&pd->seq_nr, -1); + atomic_set(&pd->reorder_objects, 0); + atomic_set(&pd->refcnt, 0); + pd->pinst = pinst; + spin_lock_init(&pd->lock); + + return pd; + +err_free_queue: + free_percpu(pd->queue); +err_free_pd: + kfree(pd); +err: + return NULL; +} + +static void padata_free_pd(struct parallel_data *pd) +{ + free_cpumask_var(pd->cpumask); + free_percpu(pd->queue); + kfree(pd); +} + +static void padata_replace(struct padata_instance *pinst, + struct parallel_data *pd_new) +{ + struct parallel_data *pd_old = pinst->pd; + + pinst->flags |= PADATA_RESET; + + rcu_assign_pointer(pinst->pd, pd_new); + + synchronize_rcu(); + + while (atomic_read(&pd_old->refcnt) != 0) + yield(); + + flush_workqueue(pinst->wq); + + padata_free_pd(pd_old); + + pinst->flags &= ~PADATA_RESET; +} + +/* + * padata_set_cpumask - set the cpumask that padata should use + * + * @pinst: padata instance + * @cpumask: the cpumask to use + */ +int padata_set_cpumask(struct padata_instance *pinst, + cpumask_var_t cpumask) +{ + struct parallel_data *pd; + int err = 0; + + might_sleep(); + + mutex_lock(&pinst->lock); + + pd = padata_alloc_pd(pinst, cpumask); + if (!pd) { + err = -ENOMEM; + goto out; + } + + cpumask_copy(pinst->cpumask, cpumask); + + padata_replace(pinst, pd); + +out: + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_set_cpumask); + +static int __padata_add_cpu(struct padata_instance *pinst, int cpu) +{ + struct parallel_data *pd; + + if (cpumask_test_cpu(cpu, cpu_active_mask)) { + pd = padata_alloc_pd(pinst, pinst->cpumask); + if (!pd) + return -ENOMEM; + + padata_replace(pinst, pd); + } + + return 0; +} + +/* + * padata_add_cpu - add a cpu to the padata cpumask + * + * @pinst: padata instance + * @cpu: cpu to add + */ +int padata_add_cpu(struct padata_instance *pinst, int cpu) +{ + int err; + + might_sleep(); + + mutex_lock(&pinst->lock); + + cpumask_set_cpu(cpu, pinst->cpumask); + err = __padata_add_cpu(pinst, cpu); + + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_add_cpu); + +static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) +{ + struct parallel_data *pd; + + if (cpumask_test_cpu(cpu, cpu_online_mask)) { + pd = padata_alloc_pd(pinst, pinst->cpumask); + if (!pd) + return -ENOMEM; + + padata_replace(pinst, pd); + } + + return 0; +} + +/* + * padata_remove_cpu - remove a cpu from the padata cpumask + * + * @pinst: padata instance + * @cpu: cpu to remove + */ +int padata_remove_cpu(struct padata_instance *pinst, int cpu) +{ + int err; + + might_sleep(); + + mutex_lock(&pinst->lock); + + cpumask_clear_cpu(cpu, pinst->cpumask); + err = __padata_remove_cpu(pinst, cpu); + + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_remove_cpu); + +/* + * padata_start - start the parallel processing + * + * @pinst: padata instance to start + */ +void padata_start(struct padata_instance *pinst) +{ + might_sleep(); + + mutex_lock(&pinst->lock); + pinst->flags |= PADATA_INIT; + mutex_unlock(&pinst->lock); +} +EXPORT_SYMBOL(padata_start); + +/* + * padata_stop - stop the parallel processing + * + * @pinst: padata instance to stop + */ +void padata_stop(struct padata_instance *pinst) +{ + might_sleep(); + + mutex_lock(&pinst->lock); + pinst->flags &= ~PADATA_INIT; + mutex_unlock(&pinst->lock); +} +EXPORT_SYMBOL(padata_stop); + +static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err; + struct padata_instance *pinst; + int cpu = (unsigned long)hcpu; + + pinst = container_of(nfb, struct padata_instance, cpu_notifier); + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + err = __padata_add_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + if (err) + return NOTIFY_BAD; + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + err = __padata_remove_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + if (err) + return NOTIFY_BAD; + break; + + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + __padata_remove_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + __padata_add_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + } + + return NOTIFY_OK; +} + +/* + * padata_alloc - allocate and initialize a padata instance + * + * @cpumask: cpumask that padata uses for parallelization + * @wq: workqueue to use for the allocated padata instance + */ +struct padata_instance *padata_alloc(const struct cpumask *cpumask, + struct workqueue_struct *wq) +{ + int err; + struct padata_instance *pinst; + struct parallel_data *pd; + + pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); + if (!pinst) + goto err; + + pd = padata_alloc_pd(pinst, cpumask); + if (!pd) + goto err_free_inst; + + rcu_assign_pointer(pinst->pd, pd); + + pinst->wq = wq; + + cpumask_copy(pinst->cpumask, cpumask); + + pinst->flags = 0; + + pinst->cpu_notifier.notifier_call = padata_cpu_callback; + pinst->cpu_notifier.priority = 0; + err = register_hotcpu_notifier(&pinst->cpu_notifier); + if (err) + goto err_free_pd; + + mutex_init(&pinst->lock); + + return pinst; + +err_free_pd: + padata_free_pd(pd); +err_free_inst: + kfree(pinst); +err: + return NULL; +} +EXPORT_SYMBOL(padata_alloc); + +/* + * padata_free - free a padata instance + * + * @ padata_inst: padata instance to free + */ +void padata_free(struct padata_instance *pinst) +{ + padata_stop(pinst); + + synchronize_rcu(); + + while (atomic_read(&pinst->pd->refcnt) != 0) + yield(); + + unregister_hotcpu_notifier(&pinst->cpu_notifier); + padata_free_pd(pinst->pd); + kfree(pinst); +} +EXPORT_SYMBOL(padata_free); -- cgit v1.2.3-70-g09d2 From 509e760cd91c831983097ae174cb6c0b8c6c8e6b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:42 +0800 Subject: tracing: Add print_fmt field This is part of a patch set that removes the show_format method in the ftrace event macros. The print_fmt field is added to hold the string that shows the print_fmt in the event format files. This patch only adds the field but it is currently not used. Later patches will use this field to enable us to remove the show_format field and function. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D3E.2000704@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + include/trace/ftrace.h | 28 +++++++++++++++++++++++++++- kernel/trace/trace_export.c | 7 +++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80d..bd23d8e52f0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -121,6 +121,7 @@ struct ftrace_event_call { int (*regfunc)(struct ftrace_event_call *); void (*unregfunc)(struct ftrace_event_call *); int id; + const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); int (*show_format)(struct ftrace_event_call *, struct trace_seq *); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c6fe03e902c..3351d85c83a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -722,8 +722,20 @@ static struct trace_event ftrace_event_type_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef __entry +#define __entry REC + +#undef __print_flags +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) + #undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static const char print_fmt_##call[] = print; #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -737,6 +749,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##template, \ .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -745,6 +758,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ \ +static const char print_fmt_##call[] = print; \ + \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ @@ -754,6 +769,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##call, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -837,6 +853,16 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #ifdef CONFIG_EVENT_PROFILE +#undef __entry +#define __entry entry + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_str +#define __get_str(field) (char *)__get_dynamic_array(field) + #undef __perf_addr #define __perf_addr(a) __addr = (a) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9978a4f4009..95d14b640a6 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -203,6 +203,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) return 0; } +#undef __entry +#define __entry REC + #undef __field #define __field(type, item) @@ -218,6 +221,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) #undef __dynamic_array #define __dynamic_array(type, item) +#undef F_printk +#define F_printk(fmt, args...) #fmt ", " __stringify(args) + #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ \ @@ -228,6 +234,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .id = type, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ + .print_fmt = print, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ -- cgit v1.2.3-70-g09d2 From c7ef3a9004201bca90626db246a19dadd2c29c9b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Dec 2009 21:13:59 -0500 Subject: tracing: Have syscall tracing call its own init function In the clean up of having all events call one specific function, the syscall event init was changed to call this helper function. With the new print_fmt updates, the syscalls need to do special initializations. This patch converts the syscall events to call its own init function again. Cc: Lai Jiangshan Cc: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 4 ++-- kernel/trace/trace_syscalls.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 207466a49f3..ed353d274a7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -143,7 +143,7 @@ struct perf_event_attr; .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -165,7 +165,7 @@ struct perf_event_attr; .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 1352b0a36fa..a78e86349ec 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -450,14 +450,14 @@ int init_syscall_trace(struct ftrace_event_call *call) if (set_syscall_print_fmt(call) < 0) return -ENOMEM; - id = register_ftrace_event(call->event); - if (!id) { + id = trace_event_raw_init(call); + + if (id < 0) { free_syscall_print_fmt(call); - return -ENODEV; + return id; } - call->id = id; - INIT_LIST_HEAD(&call->fields); - return 0; + + return id; } int __init init_ftrace_syscalls(void) -- cgit v1.2.3-70-g09d2 From 0fa0edaf32b9a78b9854f1da98d4511a501089b0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:57 +0800 Subject: tracing: Remove show_format and related macros from TRACE_EVENT The previous patches added the use of print_fmt string and changes the trace_define_field() function to also create the fields and format output for the event format files. text data bss dec hex filename 5857201 1355780 9336808 16549789 fc879d vmlinux 5884589 1351684 9337896 16574169 fce6d9 vmlinux-orig The above shows the size of the vmlinux after this patch set compared to the vmlinux-orig which is before the patch set. This saves us 27k on text, 1k on bss and adds just 4k of data. The total savings of 24k in size. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D4D.40604@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 - include/linux/syscalls.h | 2 - include/trace/ftrace.h | 133 ++---------------------------------------- include/trace/syscall.h | 4 -- kernel/trace/trace_events.c | 12 ---- kernel/trace/trace_export.c | 73 ----------------------- kernel/trace/trace_kprobe.c | 78 ------------------------- kernel/trace/trace_syscalls.c | 66 --------------------- 8 files changed, 6 insertions(+), 364 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd23d8e52f0..84a5629adfd 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -123,8 +123,6 @@ struct ftrace_event_call { int id; const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); - int (*show_format)(struct ftrace_event_call *, - struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ed353d274a7..7b219696ad2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -144,7 +144,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ @@ -166,7 +165,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3351d85c83a..df65b99880b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -130,130 +130,6 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_ext -#define __field_ext(type, item, filter_type) __field(type, item) - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), \ - __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __entry -#define __entry REC - -#undef __print_symbolic -#undef __get_dynamic_array -#undef __get_str - -#undef TP_printk -#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TP_perf_assign -#define TP_perf_assign(args...) - -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_setup_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - return ret; \ -} \ - \ -static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##call(unused, s); \ - if (!ret) \ - return ret; \ - \ - ret = trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, name, proto, args) - -#undef DEFINE_EVENT_PRINT -#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##template(unused, s); \ - if (!ret) \ - return ret; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - /* * Stage 3 of the trace events. * @@ -622,7 +498,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, - * .show_format = ftrace_format_, * } * */ @@ -657,6 +532,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TP_perf_assign +#define TP_perf_assign(args...) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ @@ -750,7 +631,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##template, \ - .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -770,7 +650,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##call, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556b..8cd41025445 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,10 +34,6 @@ struct syscall_metadata { extern unsigned long arch_syscall_addr(int nr); extern int init_syscall_trace(struct ftrace_event_call *call); -extern int syscall_enter_format(struct ftrace_event_call *call, - struct trace_seq *s); -extern int syscall_exit_format(struct ftrace_event_call *call, - struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); extern int reg_event_syscall_enter(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 250ec865d5f..c2a3077b735 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -520,14 +520,6 @@ out: return ret; } -extern char *__bad_type_size(void); - -#undef FIELD -#define FIELD(type, name) \ - sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ - #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name), is_signed_type(type) - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -965,10 +957,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, filter); } - /* A trace may not want to export its format */ - if (!call->show_format) - return 0; - trace_create_file("format", 0444, call->dir, call, format); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 95d14b640a6..e091f64ba6c 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \ #include "trace_entries.h" - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_desc -#define __field_desc(type, container, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array_desc -#define __array_desc(type, container, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef F_printk -#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef __entry -#define __entry REC - -#undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct struct_name field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include "trace_entries.h" - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ @@ -235,7 +163,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ .print_fmt = print, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 147491dccea..c99029916c7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1174,82 +1174,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int __probe_event_show_format(struct trace_seq *s, - struct trace_probe *tp, const char *fmt, - const char *arg) -{ - int i; - - /* Show format */ - if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) - return 0; - - if (!trace_seq_printf(s, "\", %s", arg)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) - return 0; - - return trace_seq_puts(s, "\n"); -} - -#undef SHOW_FIELD -#define SHOW_FIELD(type, item, name) \ - do { \ - ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ - "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ - (unsigned int)offsetof(typeof(field), item),\ - (unsigned int)sizeof(type), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; \ - } while (0) - -static int kprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx)", - "REC->" FIELD_STRING_IP); -} - -static int kretprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kretprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); - SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx <- %lx)", - "REC->" FIELD_STRING_FUNC - ", REC->" FIELD_STRING_RETIP); -} - static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) { int i; @@ -1504,12 +1428,10 @@ static int register_probe_event(struct trace_probe *tp) if (probe_is_return(tp)) { tp->event.trace = print_kretprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kretprobe_event_show_format; call->define_fields = kretprobe_event_define_fields; } else { tp->event.trace = print_kprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } if (set_print_fmt(tp) < 0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a78e86349ec..49cea70fbf6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -143,54 +143,6 @@ extern char *__bad_type_size(void); #type, #name, offsetof(typeof(trace), name), \ sizeof(trace.name), is_signed_type(type) -int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int i; - int ret; - struct syscall_metadata *entry = call->data; - struct syscall_trace_enter trace; - int offset = offsetof(struct syscall_trace_enter, args); - - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr)); - if (!ret) - return 0; - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], - entry->args[i]); - if (!ret) - return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" - "\tsigned:%u;\n", offset, - sizeof(unsigned long), - is_signed_type(unsigned long)); - if (!ret) - return 0; - offset += sizeof(unsigned long); - } - - trace_seq_puts(s, "\nprint fmt: \""); - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], - sizeof(unsigned long), - i == entry->nb_args - 1 ? "" : ", "); - if (!ret) - return 0; - } - trace_seq_putc(s, '"'); - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", - entry->args[i]); - if (!ret) - return 0; - } - - return trace_seq_putc(s, '\n'); -} - static int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) { @@ -252,24 +204,6 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int ret; - struct syscall_trace_exit trace; - - ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr), - SYSCALL_FIELD(long, ret)); - if (!ret) - return 0; - - return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); -} - int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; -- cgit v1.2.3-70-g09d2 From 65324144b50bc7022cc9b6ca8f4a536a957019e3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 5 Jan 2010 05:50:47 +0000 Subject: net: RFC3069, private VLAN proxy arp support This is to be used together with switch technologies, like RFC3069, that where the individual ports are not allowed to communicate with each other, but they are allowed to talk to the upstream router. As described in RFC 3069, it is possible to allow these hosts to communicate through the upstream router by proxy_arp'ing. This patch basically allow proxy arp replies back to the same interface (from which the ARP request/solicitation was received). Tunable per device via proc "proxy_arp_pvlan": /proc/sys/net/ipv4/conf/*/proxy_arp_pvlan This switch technology is known by different vendor names: - In RFC 3069 it is called VLAN Aggregation. - Cisco and Allied Telesyn call it Private VLAN. - Hewlett-Packard call it Source-Port filtering or port-isolation. - Ericsson call it MAC-Forced Forwarding (RFC Draft). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 19 +++++++++++++ include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + net/ipv4/arp.c | 52 +++++++++++++++++++++++++++++++--- net/ipv4/devinet.c | 1 + net/ipv4/route.c | 7 ++++- 6 files changed, 76 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 006b39dec87..c532884f4fe 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -692,6 +692,25 @@ proxy_arp - BOOLEAN conf/{all,interface}/proxy_arp is set to TRUE, it will be disabled otherwise +proxy_arp_pvlan - BOOLEAN + Private VLAN proxy arp. + Basically allow proxy arp replies back to the same interface + (from which the ARP request/solicitation was received). + + This is done to support (ethernet) switch features, like RFC + 3069, where the individual ports are NOT allowed to + communicate with each other, but they are allowed to talk to + the upstream router. As described in RFC 3069, it is possible + to allow these hosts to communicate through the upstream + router by proxy_arp'ing. Don't need to be used together with + proxy_arp. + + This technology is known by different names: + In RFC 3069 it is called VLAN Aggregation. + Cisco and Allied Telesyn call it Private VLAN. + Hewlett-Packard call it Source-Port filtering or port-isolation. + Ericsson call it MAC-Forced Forwarding (RFC Draft). + shared_media - BOOLEAN Send(router) or accept(host) RFC1620 shared media redirects. Overrides ip_secure_redirects. diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c01a4..9a8c57467d3 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -88,6 +88,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba039e6a..24ff7e3a0d5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_PROXY_ARP_PVLAN=24, __NET_IPV4_CONF_MAX }; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c95cd93acf2..078709233bc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -70,6 +70,7 @@ * bonding can change the skb before * sending (e.g. insert 8021q tag). * Harald Welte : convert to make use of jenkins hash + * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. */ #include @@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst) /* * Check if we can use proxy ARP for this path */ - -static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) +static inline int arp_fwd_proxy(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt) { struct in_device *out_dev; int imi, omi = -1; + if (rt->u.dst.dev == dev) + return 0; + if (!IN_DEV_PROXY_ARP(in_dev)) return 0; @@ -547,6 +551,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) return (omi != imi && omi != -1); } +/* + * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev) + * + * RFC3069 supports proxy arp replies back to the same interface. This + * is done to support (ethernet) switch features, like RFC 3069, where + * the individual ports are not allowed to communicate with each + * other, BUT they are allowed to talk to the upstream router. As + * described in RFC 3069, it is possible to allow these hosts to + * communicate through the upstream router, by proxy_arp'ing. + * + * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation" + * + * This technology is known by different names: + * In RFC 3069 it is called VLAN Aggregation. + * Cisco and Allied Telesyn call it Private VLAN. + * Hewlett-Packard call it Source-Port filtering or port-isolation. + * Ericsson call it MAC-Forced Forwarding (RFC Draft). + * + */ +static inline int arp_fwd_pvlan(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt, + __be32 sip, __be32 tip) +{ + /* Private VLAN is only concerned about the same ethernet segment */ + if (rt->u.dst.dev != dev) + return 0; + + /* Don't reply on self probes (often done by windowz boxes)*/ + if (sip == tip) + return 0; + + if (IN_DEV_PROXY_ARP_PVLAN(in_dev)) + return 1; + else + return 0; +} + /* * Interface to link layer: send routine and receive handler. */ @@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb) } goto out; } else if (IN_DEV_FORWARD(in_dev)) { - if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + if (addr_type == RTN_UNICAST && + (arp_fwd_proxy(in_dev, dev, rt) || + arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || + pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) + { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc102a41..0715f4cac39 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1407,6 +1407,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), + DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e446496f564..1cc339441e7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is * invalid for proxy arp. DNAT routes are always valid. + * + * Proxy arp feature have been extended to allow, ARP + * replies back to the same interface, to support + * Private VLAN switch technologies. See arp.c. */ - if (out_dev == in_dev) { + if (out_dev == in_dev && + IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { err = -EINVAL; goto cleanup; } -- cgit v1.2.3-70-g09d2 From 3c9732c06879d85f2fdf7ec69198c1d78da42a98 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 6 Jan 2010 23:07:13 +0000 Subject: stmmac: add the new Header file for stmmac platform data Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/stmmac.h (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h new file mode 100644 index 00000000000..32bfd1a8a48 --- /dev/null +++ b/include/linux/stmmac.h @@ -0,0 +1,53 @@ +/******************************************************************************* + + Header file for stmmac platform data + + Copyright (C) 2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro +*******************************************************************************/ + +#ifndef __STMMAC_PLATFORM_DATA +#define __STMMAC_PLATFORM_DATA + +/* platfrom data for platfrom device structure's platfrom_data field */ + +/* Private data for the STM on-board ethernet driver */ +struct plat_stmmacenet_data { + int bus_id; + int pbl; + int has_gmac; + void (*fix_mac_speed)(void *priv, unsigned int speed); + void (*bus_setup)(unsigned long ioaddr); +#ifdef CONFIG_STM_DRIVERS + struct stm_pad_config *pad_config; +#endif + void *bsp_priv; +}; + +struct plat_stmmacphy_data { + int bus_id; + int phy_addr; + unsigned int phy_mask; + int interface; + int (*phy_reset)(void *priv); + void *priv; +}; +#endif + -- cgit v1.2.3-70-g09d2 From 0ed731859e24cd6e3ec058cf2b49b2a0df80e86b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 6 Jan 2010 09:23:54 +0900 Subject: LSM: Update comment on security_sock_rcv_skb It is not permitted to do sleeping operation inside security_sock_rcv_skb(). Signed-off-by: Tetsuo Handa Acked-by: Serge Hallyn -- Signed-off-by: James Morris --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 466cbadbd1e..3696ca34574 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -978,6 +978,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Check permissions on incoming network packets. This hook is distinct * from Netfilter's IP input hooks since it is the first time that the * incoming sk_buff @skb has been associated with a particular socket, @sk. + * Must not sleep inside this hook because some callers hold spinlocks. * @sk contains the sock (not socket) associated with the incoming sk_buff. * @skb contains the incoming network data. * @socket_getpeersec_stream: -- cgit v1.2.3-70-g09d2 From e03a72e13648ac6277bf2bab6b8324d51f89c0fa Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 11 Jan 2010 03:21:51 -0500 Subject: block: Stop using byte offsets All callers of the stacking functions use 512-byte sector units rather than byte offsets. Simplify the code so the stacking functions take sectors when specifying data offsets. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 26 +++++++++----------------- fs/partitions/check.c | 7 ++++--- include/linux/blkdev.h | 17 +++++------------ 3 files changed, 18 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 5eeb9e0d256..78549c72378 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -507,7 +507,7 @@ static unsigned int lcm(unsigned int a, unsigned int b) * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) * @b: the underlying queue limits (bottom, component device) - * @offset: offset to beginning of data within component device + * @start: first data sector within component device * * Description: * This function is used by stacking drivers like MD and DM to ensure @@ -525,10 +525,9 @@ static unsigned int lcm(unsigned int a, unsigned int b) * the alignment_offset is undefined. */ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, - sector_t offset) + sector_t start) { - sector_t alignment; - unsigned int top, bottom, ret = 0; + unsigned int top, bottom, alignment, ret = 0; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); @@ -548,7 +547,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->misaligned |= b->misaligned; - alignment = queue_limit_alignment_offset(b, offset); + alignment = queue_limit_alignment_offset(b, start); /* Bottom device has different alignment. Check that it is * compatible with the current top alignment. @@ -611,11 +610,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Discard alignment and granularity */ if (b->discard_granularity) { - unsigned int granularity = b->discard_granularity; - offset &= granularity - 1; - - alignment = (granularity + b->discard_alignment - offset) - & (granularity - 1); + alignment = queue_limit_discard_alignment(b, start); if (t->discard_granularity != 0 && t->discard_alignment != alignment) { @@ -657,7 +652,7 @@ int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, start += get_start_sect(bdev); - return blk_stack_limits(t, &bq->limits, start << 9); + return blk_stack_limits(t, &bq->limits, start); } EXPORT_SYMBOL(bdev_stack_limits); @@ -668,9 +663,8 @@ EXPORT_SYMBOL(bdev_stack_limits); * @offset: offset to beginning of data within component device * * Description: - * Merges the limits for two queues. Returns 0 if alignment - * didn't change. Returns -1 if adding the bottom device caused - * misalignment. + * Merges the limits for a top level gendisk and a bottom level + * block_device. */ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) @@ -678,9 +672,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, struct request_queue *t = disk->queue; struct request_queue *b = bdev_get_queue(bdev); - offset += get_start_sect(bdev) << 9; - - if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { + if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; disk_name(disk, 0, top); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 64bc8998ac9..e8865c11777 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -412,9 +412,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); p->start_sect = start; - p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); - p->discard_alignment = queue_sector_discard_alignment(disk->queue, - start); + p->alignment_offset = + queue_limit_alignment_offset(&disk->queue->limits, start); + p->discard_alignment = + queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c8018977ef..ffb13ad3571 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1112,18 +1112,13 @@ static inline int queue_alignment_offset(struct request_queue *q) return q->limits.alignment_offset; } -static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); + unsigned int alignment = (sector << 9) & (granularity - 1); - offset &= granularity - 1; - return (granularity + lim->alignment_offset - offset) & (granularity - 1); -} - -static inline int queue_sector_alignment_offset(struct request_queue *q, - sector_t sector) -{ - return queue_limit_alignment_offset(&q->limits, sector << 9); + return (granularity + lim->alignment_offset - alignment) + & (granularity - 1); } static inline int bdev_alignment_offset(struct block_device *bdev) @@ -1147,10 +1142,8 @@ static inline int queue_discard_alignment(struct request_queue *q) return q->limits.discard_alignment; } -static inline int queue_sector_discard_alignment(struct request_queue *q, - sector_t sector) +static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - struct queue_limits *lim = &q->limits; unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); return (lim->discard_granularity + lim->discard_alignment - alignment) -- cgit v1.2.3-70-g09d2 From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Jan 2010 16:28:01 -0800 Subject: tcp: Generalized TTL Security Mechanism This patch adds the kernel portions needed to implement RFC 5082 Generalized TTL Security Mechanism (GTSM). It is a lightweight security measure against forged packets causing DoS attacks (for BGP). This is already implemented the same way in BSD kernels. For the necessary Quagga patch http://www.gossamer-threads.com/lists/quagga/dev/17389 Description from Cisco http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html It does add one byte to each socket structure, but I did a little rearrangement to reuse a hole (on 64 bit), but it does grow the structure on 32 bit This should be documented on ip(4) man page and the Glibc in.h file also needs update. IPV6_MINHOPLIMIT should also be added (although BSD doesn't support that). Only TCP is supported, but could also be added to UDP, DCCP, SCTP if desired. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/in.h | 2 ++ include/net/inet_sock.h | 4 +++- net/ipv4/ip_sockglue.c | 14 +++++++++++++- net/ipv4/tcp_ipv4.c | 3 +++ 4 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index b615649db12..583c76f9c30 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -84,6 +84,8 @@ struct in_addr { #define IP_ORIGDSTADDR 20 #define IP_RECVORIGDSTADDR IP_ORIGDSTADDR +#define IP_MINTTL 21 + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index bd4c53f75ac..83fd34437cf 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -122,10 +122,12 @@ struct inet_sock { __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; - struct ip_options *opt; __be16 inet_sport; __u16 inet_id; + + struct ip_options *opt; __u8 tos; + __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; __u8 recverr:1, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cafad9baff0..644dc43a55d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<transparent = !!val; break; + case IP_MINTTL: + if (optlen < 1) + goto e_inval; + if (val < 0 || val > 255) + goto e_inval; + inet->min_ttl = val; + break; + default: err = -ENOPROTOOPT; break; @@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TRANSPARENT: val = inet->transparent; break; + case IP_MINTTL: + val = inet->min_ttl; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65b8ebfd078..382f667238e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1649,6 +1649,9 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; + if (iph->ttl < inet_sk(sk)->min_ttl) + goto discard_and_relse; + process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; -- cgit v1.2.3-70-g09d2 From 3ccd4c6167d3b39d52631767ebbf8b5677c5855d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 12 Jan 2010 02:00:46 -0800 Subject: can: Unify droping of invalid tx skbs and netdev stats To prevent the CAN drivers to operate on invalid socketbuffers the skbs are now checked and silently dropped at the xmit-function consistently. Also the netdev stats are consistently using the CAN data length code (dlc) for [rx|tx]_bytes now. Signed-off-by: Oliver Hartkopp Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/at91_can.c | 3 +++ drivers/net/can/bfin_can.c | 3 +++ drivers/net/can/mcp251x.c | 6 +----- drivers/net/can/mscan/mscan.c | 5 +---- drivers/net/can/sja1000/sja1000.c | 3 +++ drivers/net/can/ti_hecc.c | 4 +++- drivers/net/can/usb/ems_usb.c | 3 +++ drivers/net/can/vcan.c | 12 +++++++++--- include/linux/can/dev.h | 15 +++++++++++++++ 9 files changed, 41 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 166cc7e579c..f7287497ba6 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -342,6 +342,9 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int mb, prio; u32 reg_mid, reg_mcr; + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + mb = get_tx_next_mb(priv); prio = get_tx_next_prio(priv); diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c index 0ec1524523c..7e1926e79e9 100644 --- a/drivers/net/can/bfin_can.c +++ b/drivers/net/can/bfin_can.c @@ -318,6 +318,9 @@ static int bfin_can_start_xmit(struct sk_buff *skb, struct net_device *dev) u16 val; int i; + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + netif_stop_queue(dev); /* fill id */ diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index 1a72ca066a1..afa2fa45fed 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -494,12 +494,8 @@ static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (skb->len != sizeof(struct can_frame)) { - dev_err(&spi->dev, "dropping packet - bad length\n"); - dev_kfree_skb(skb); - net->stats.tx_dropped++; + if (can_dropped_invalid_skb(net, skb)) return NETDEV_TX_OK; - } netif_stop_queue(net); priv->tx_skb = skb; diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 500d18918bd..40827c128b6 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -204,11 +204,8 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev) int i, rtr, buf_id; u32 can_id; - if (skb->len != sizeof(*frame) || frame->can_dlc > 8) { - kfree_skb(skb); - dev->stats.tx_dropped++; + if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; - } out_8(®s->cantier, 0); diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 542a4f7255b..345304d779b 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -249,6 +249,9 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb, uint8_t dreg; int i; + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + netif_stop_queue(dev); fi = dlc = cf->can_dlc; diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 5c993c2da52..7d370e32a7a 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -477,6 +477,9 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) u32 mbxno, mbx_mask, data; unsigned long flags; + if (can_dropped_invalid_skb(ndev, skb)) + return NETDEV_TX_OK; + mbxno = get_tx_head_mb(priv); mbx_mask = BIT(mbxno); spin_lock_irqsave(&priv->mbx_lock, flags); @@ -491,7 +494,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) spin_unlock_irqrestore(&priv->mbx_lock, flags); /* Prepare mailbox for transmission */ - data = min_t(u8, cf->can_dlc, 8); if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */ data |= HECC_CANMCF_RTR; data |= get_tx_head_prio(priv) << 8; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index efbb05c71bf..ddb17e25665 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -767,6 +767,9 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN + sizeof(struct cpc_can_msg); + if (can_dropped_invalid_skb(netdev, skb)) + return NETDEV_TX_OK; + /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 80ac5631398..d124d837ae5 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -47,6 +47,7 @@ #include #include #include +#include #include static __initdata const char banner[] = @@ -70,10 +71,11 @@ MODULE_PARM_DESC(echo, "Echo sent frames (for testing). Default: 0 (Off)"); static void vcan_rx(struct sk_buff *skb, struct net_device *dev) { + struct can_frame *cf = (struct can_frame *)skb->data; struct net_device_stats *stats = &dev->stats; stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += cf->can_dlc; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -85,11 +87,15 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev) { + struct can_frame *cf = (struct can_frame *)skb->data; struct net_device_stats *stats = &dev->stats; int loop; + if (can_dropped_invalid_skb(dev, skb)) + return NETDEV_TX_OK; + stats->tx_packets++; - stats->tx_bytes += skb->len; + stats->tx_bytes += cf->can_dlc; /* set flag whether this packet has to be looped back */ loop = skb->pkt_type == PACKET_LOOPBACK; @@ -103,7 +109,7 @@ static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev) * CAN core already did the echo for us */ stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += cf->can_dlc; } kfree_skb(skb); return NETDEV_TX_OK; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 3db7767d2a1..7e7c98a3e90 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -60,6 +60,21 @@ struct can_priv { */ #define get_can_dlc(i) (min_t(__u8, (i), 8)) +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +static inline int can_dropped_invalid_skb(struct net_device *dev, + struct sk_buff *skb) +{ + const struct can_frame *cf = (struct can_frame *)skb->data; + + if (unlikely(skb->len != sizeof(*cf) || cf->can_dlc > 8)) { + kfree_skb(skb); + dev->stats.tx_dropped++; + return 1; + } + + return 0; +} + struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); void free_candev(struct net_device *dev); -- cgit v1.2.3-70-g09d2 From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001 From: Lukáš Turek <8an@praha12.net> Date: Mon, 21 Dec 2009 22:50:47 +0100 Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11 Coverage Class, which depends on maximum distance of nodes in a wireless network. It's required for long distance links (more than a few hundred meters). The attribute is now ignored by two non-mac80211 drivers, rndis and iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split set_wiphy_params callback or add new capability bits. Signed-off-by: Lukas Turek <8an@praha12.net> Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/core.c | 1 + net/wireless/nl80211.c | 15 +++++++++++++++ 4 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2bfbe88837e..d4c556de717 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -430,6 +430,8 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length * larger than or equal to this use RTS/CTS handshake); allowed range: * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 + * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11 + * section 7.3.2.9; dot11CoverageClass; u8 * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -779,6 +781,8 @@ enum nl80211_attrs { NL80211_ATTR_COOKIE, + NL80211_ATTR_WIPHY_COVERAGE_CLASS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index add79930f47..a8d5d04314b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -837,6 +837,7 @@ enum wiphy_params_flags { WIPHY_PARAM_RETRY_LONG = 1 << 1, WIPHY_PARAM_FRAG_THRESHOLD = 1 << 2, WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, + WIPHY_PARAM_COVERAGE_CLASS = 1 << 4, }; /** @@ -1236,6 +1237,7 @@ struct wiphy { u8 retry_long; u32 frag_threshold; u32 rts_threshold; + u8 coverage_class; char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; diff --git a/net/wireless/core.c b/net/wireless/core.c index c2a2c563d21..0a545bb6ed0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -402,6 +402,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.retry_long = 4; rdev->wiphy.frag_threshold = (u32) -1; rdev->wiphy.rts_threshold = (u32) -1; + rdev->wiphy.coverage_class = 0; return &rdev->wiphy; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3bee3cecdf..c09fbcd278f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -69,6 +69,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -444,6 +445,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.frag_threshold); NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, dev->wiphy.rts_threshold); + NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, + dev->wiphy.coverage_class); NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); @@ -684,6 +687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 changed; u8 retry_short = 0, retry_long = 0; u32 frag_threshold = 0, rts_threshold = 0; + u8 coverage_class = 0; rtnl_lock(); @@ -806,9 +810,16 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) changed |= WIPHY_PARAM_RTS_THRESHOLD; } + if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { + coverage_class = nla_get_u8( + info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); + changed |= WIPHY_PARAM_COVERAGE_CLASS; + } + if (changed) { u8 old_retry_short, old_retry_long; u32 old_frag_threshold, old_rts_threshold; + u8 old_coverage_class; if (!rdev->ops->set_wiphy_params) { result = -EOPNOTSUPP; @@ -819,6 +830,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) old_retry_long = rdev->wiphy.retry_long; old_frag_threshold = rdev->wiphy.frag_threshold; old_rts_threshold = rdev->wiphy.rts_threshold; + old_coverage_class = rdev->wiphy.coverage_class; if (changed & WIPHY_PARAM_RETRY_SHORT) rdev->wiphy.retry_short = retry_short; @@ -828,6 +840,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.frag_threshold = frag_threshold; if (changed & WIPHY_PARAM_RTS_THRESHOLD) rdev->wiphy.rts_threshold = rts_threshold; + if (changed & WIPHY_PARAM_COVERAGE_CLASS) + rdev->wiphy.coverage_class = coverage_class; result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); if (result) { @@ -835,6 +849,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.retry_long = old_retry_long; rdev->wiphy.frag_threshold = old_frag_threshold; rdev->wiphy.rts_threshold = old_rts_threshold; + rdev->wiphy.coverage_class = old_coverage_class; } } -- cgit v1.2.3-70-g09d2 From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 29 Dec 2009 12:59:45 +0200 Subject: nl80211: New command for setting TX rate mask for rate control Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related attributes to provide support for setting TX rate mask for rate control. This uses the existing cfg80211 set_bitrate_mask operation that was previously used only with WEXT compat code (SIOCSIWRATE). The nl80211 command allows more generic configuration of allowed rates as a mask instead of fixed/max rate. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 44 +++++++++++++++++++ include/net/cfg80211.h | 4 +- net/wireless/nl80211.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d4c556de717..7a1c8c145b2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,6 +295,10 @@ * This command is also used as an event to notify when a requested * remain-on-channel duration has expired. * + * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX + * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface + * and @NL80211_ATTR_TX_RATES the set of allowed rates. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -381,6 +385,8 @@ enum nl80211_commands { NL80211_CMD_REMAIN_ON_CHANNEL, NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + NL80211_CMD_SET_TX_BITRATE_MASK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -640,6 +646,13 @@ enum nl80211_commands { * * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. * + * @NL80211_ATTR_TX_RATES: Nested set of attributes + * (enum nl80211_tx_rate_attributes) describing TX rates per band. The + * enum nl80211_band value is used as the index (nla_type() of the nested + * data. If a band is not included, it will be configured to allow all + * rates based on negotiated supported rates information. This attribute + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -783,6 +796,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_COVERAGE_CLASS, + NL80211_ATTR_TX_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1482,4 +1497,33 @@ enum nl80211_key_attributes { NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 }; +/** + * enum nl80211_tx_rate_attributes - TX rate set attributes + * @__NL80211_TXRATE_INVALID: invalid + * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection + * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with + * 1 = 500 kbps) but without the IE length restriction (at most + * %NL80211_MAX_SUPP_RATES in a single array). + * @__NL80211_TXRATE_AFTER_LAST: internal + * @NL80211_TXRATE_MAX: highest TX rate attribute + */ +enum nl80211_tx_rate_attributes { + __NL80211_TXRATE_INVALID, + NL80211_TXRATE_LEGACY, + + /* keep last */ + __NL80211_TXRATE_AFTER_LAST, + NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band - Frequency band + * @NL80211_BAND_2GHZ - 2.4 GHz ISM band + * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + */ +enum nl80211_band { + NL80211_BAND_2GHZ, + NL80211_BAND_5GHZ, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 22e062afb5a..0d734413b5f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -39,8 +39,8 @@ * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) */ enum ieee80211_band { - IEEE80211_BAND_2GHZ, - IEEE80211_BAND_5GHZ, + IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, + IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, /* keep last */ IEEE80211_NUM_BANDS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c09fbcd278f..b804062e017 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -144,6 +144,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = WLAN_PMKID_LEN }, [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, + [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, }; /* policy for the attributes */ @@ -575,6 +576,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4438,6 +4440,109 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static struct nla_policy +nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, +}; + +static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev; + struct cfg80211_bitrate_mask mask; + int err, rem, i; + struct net_device *dev; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + + if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->set_bitrate_mask) { + err = -EOPNOTSUPP; + goto unlock; + } + + memset(&mask, 0, sizeof(mask)); + /* Default to all rates enabled */ + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + mask.control[i].legacy = + sband ? (1 << sband->n_bitrates) - 1 : 0; + } + + /* + * The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum ieee80211_band values used in cfg80211. + */ + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) + { + enum ieee80211_band band = nla_type(tx_rates); + if (band < 0 || band >= IEEE80211_NUM_BANDS) { + err = -EINVAL; + goto unlock; + } + sband = rdev->wiphy.bands[band]; + if (sband == NULL) { + err = -EINVAL; + goto unlock; + } + nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (tb[NL80211_TXRATE_LEGACY]) { + mask.control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if (mask.control[band].legacy == 0) { + err = -EINVAL; + goto unlock; + } + } + } + + err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask); + + unlock: + dev_put(dev); + cfg80211_unlock_rdev(rdev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4712,6 +4817,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, + .doit = nl80211_set_tx_bitrate_mask, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { -- cgit v1.2.3-70-g09d2 From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 5 Jan 2010 20:16:19 +0200 Subject: mac80211: add functions to create PS Poll and Nullfunc templates Some hardware, for example wl1251 and wl1271, handle the transmission of power save related frames in hardware, but the driver is responsible for creating the templates. It's better to create the templates in mac80211, that way all drivers can benefit from this. Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get() which drivers need to call to get the frame. Drivers are also responsible for updating the templates after each association. Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it easy to calculate length of the Nullfunc frame. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 9 ++++++ include/net/mac80211.h | 30 ++++++++++++++++++ net/mac80211/tx.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aeea282bd2f..602c0692c3f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -130,6 +130,15 @@ struct ieee80211_hdr { u8 addr4[6]; } __attribute__ ((packed)); +struct ieee80211_hdr_3addr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[6]; + u8 addr2[6]; + u8 addr3[6]; + __le16 seq_ctrl; +} __attribute__ ((packed)); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7e5af6d90b9..75f46e26ad6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1874,6 +1874,36 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, return ieee80211_beacon_get_tim(hw, vif, NULL, NULL); } +/** + * ieee80211_pspoll_get - retrieve a PS Poll template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Creates a PS Poll a template which can, for example, uploaded to + * hardware. The template must be updated after association so that correct + * AID, BSSID and MAC address is used. + * + * Note: Caller (or hardware) is responsible for setting the + * &IEEE80211_FCTL_PM bit. + */ +struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + +/** + * ieee80211_nullfunc_get - retrieve a nullfunc template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Creates a Nullfunc template which can, for example, uploaded to + * hardware. The template must be updated after association so that correct + * BSSID and address is used. + * + * Note: Caller (or hardware) is responsible for setting the + * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. + */ +struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + /** * ieee80211_rts_get - RTS frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d3a44812f8b..055b45b146d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2200,6 +2200,84 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_beacon_get_tim); +struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_managed *ifmgd; + struct ieee80211_pspoll *pspoll; + struct ieee80211_local *local; + struct sk_buff *skb; + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return NULL; + + sdata = vif_to_sdata(vif); + ifmgd = &sdata->u.mgd; + local = sdata->local; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for " + "pspoll template\n", sdata->name); + return NULL; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); + memset(pspoll, 0, sizeof(*pspoll)); + pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_PSPOLL); + pspoll->aid = cpu_to_le16(ifmgd->aid); + + /* aid in PS-Poll has its two MSBs each set to 1 */ + pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); + + memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN); + memcpy(pspoll->ta, vif->addr, ETH_ALEN); + + return skb; +} +EXPORT_SYMBOL(ieee80211_pspoll_get); + +struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_hdr_3addr *nullfunc; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_managed *ifmgd; + struct ieee80211_local *local; + struct sk_buff *skb; + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return NULL; + + sdata = vif_to_sdata(vif); + ifmgd = &sdata->u.mgd; + local = sdata->local; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " + "template\n", sdata->name); + return NULL; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb, + sizeof(*nullfunc)); + memset(nullfunc, 0, sizeof(*nullfunc)); + nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); + memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); + memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); + + return skb; +} +EXPORT_SYMBOL(ieee80211_nullfunc_get); + void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, const struct ieee80211_tx_info *frame_txctl, -- cgit v1.2.3-70-g09d2 From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 Jan 2010 16:19:24 +0200 Subject: cfg80211: Store IEs from both Beacon and Probe Response frames Store information elements from Beacon and Probe Response frames in separate buffers to allow both sets to be made available through nl80211. This allows user space applications to get access to IEs from Beacon frames even if we have received Probe Response frames from the BSS. Previously, the IEs from Probe Response frames would have overridden the IEs from Beacon frames. This feature is of somewhat limited use since most protocols include the same (or extended) information in Probe Response frames. However, there are couple of exceptions where the IEs from Beacon frames could be of some use: TIM IE is only included in Beacon frames (and it would be needed to figure out the DTIM period used in the BSS) and at least some implementations of Wireless Provisioning Services seem to include the full IE only in Beacon frames). The new BSS attribute for scan results is added to allow both the IE sets to be delivered. This is done in a way that maintains the previously used behavior for applications that are not aware of the new NL80211_BSS_BEACON_IES attribute. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 +++- include/net/cfg80211.h | 12 ++++- net/wireless/core.h | 3 +- net/wireless/nl80211.c | 4 ++ net/wireless/scan.c | 120 ++++++++++++++++++++++++++++++++++++------------ 5 files changed, 116 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7a1c8c145b2..127a7301576 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1378,13 +1378,20 @@ enum nl80211_channel_type { * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the - * raw information elements from the probe response/beacon (bin) + * raw information elements from the probe response/beacon (bin); + * if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are + * from a Probe Response frame; otherwise they are from a Beacon frame. + * However, if the driver does not indicate the source of the IEs, these + * IEs may be from either frame subtype. * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon * in mBm (100 * dBm) (s32) * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon * in unspecified units, scaled to 0..100 (u8) * @NL80211_BSS_STATUS: status, if this BSS is "used" * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms + * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information + * elements from a Beacon frame (bin); not present if no Beacon frame has + * yet been received * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -1400,6 +1407,7 @@ enum nl80211_bss { NL80211_BSS_SIGNAL_UNSPEC, NL80211_BSS_STATUS, NL80211_BSS_SEEN_MS_AGO, + NL80211_BSS_BEACON_IES, /* keep last */ __NL80211_BSS_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0d734413b5f..2af52704e67 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,8 +626,14 @@ enum cfg80211_signal_type { * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order * @information_elements: the information elements (Note that there - * is no guarantee that these are well-formed!) + * is no guarantee that these are well-formed!); this is a pointer to + * either the beacon_ies or proberesp_ies depending on whether Probe + * Response frame has been received * @len_information_elements: total length of the information elements + * @beacon_ies: the information elements from the last Beacon frame + * @len_beacon_ies: total length of the beacon_ies + * @proberesp_ies: the information elements from the last Probe Response frame + * @len_proberesp_ies: total length of the proberesp_ies * @signal: signal strength value (type depends on the wiphy's signal_type) * @free_priv: function pointer to free private data * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes @@ -641,6 +647,10 @@ struct cfg80211_bss { u16 capability; u8 *information_elements; size_t len_information_elements; + u8 *beacon_ies; + size_t len_beacon_ies; + u8 *proberesp_ies; + size_t len_proberesp_ies; s32 signal; diff --git a/net/wireless/core.h b/net/wireless/core.h index 30ec95f05b5..2d6a6b9c0c4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -111,7 +111,8 @@ struct cfg80211_internal_bss { unsigned long ts; struct kref ref; atomic_t hold; - bool ies_allocated; + bool beacon_ies_allocated; + bool proberesp_ies_allocated; /* must be last because of priv member */ struct cfg80211_bss pub; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b804062e017..4af7991a9ec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3163,6 +3163,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS, res->len_information_elements, res->information_elements); + if (res->beacon_ies && res->len_beacon_ies && + res->beacon_ies != res->information_elements) + NLA_PUT(msg, NL80211_BSS_BEACON_IES, + res->len_beacon_ies, res->beacon_ies); if (res->tsf) NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf); if (res->beacon_interval) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0c2cbbebca9..06b0231ee5e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -100,8 +100,10 @@ static void bss_release(struct kref *ref) if (bss->pub.free_priv) bss->pub.free_priv(&bss->pub); - if (bss->ies_allocated) - kfree(bss->pub.information_elements); + if (bss->beacon_ies_allocated) + kfree(bss->pub.beacon_ies); + if (bss->proberesp_ies_allocated) + kfree(bss->pub.proberesp_ies); BUG_ON(atomic_read(&bss->hold)); @@ -375,8 +377,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res, - bool overwrite) + struct cfg80211_internal_bss *res) { struct cfg80211_internal_bss *found = NULL; const u8 *meshid, *meshcfg; @@ -418,28 +419,64 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found->pub.capability = res->pub.capability; found->ts = res->ts; - /* overwrite IEs */ - if (overwrite) { + /* Update IEs */ + if (res->pub.proberesp_ies) { size_t used = dev->wiphy.bss_priv_size + sizeof(*res); - size_t ielen = res->pub.len_information_elements; + size_t ielen = res->pub.len_proberesp_ies; + + if (found->pub.proberesp_ies && + !found->proberesp_ies_allocated && + ksize(found) >= used + ielen) { + memcpy(found->pub.proberesp_ies, + res->pub.proberesp_ies, ielen); + found->pub.len_proberesp_ies = ielen; + } else { + u8 *ies = found->pub.proberesp_ies; + + if (found->proberesp_ies_allocated) + ies = krealloc(ies, ielen, GFP_ATOMIC); + else + ies = kmalloc(ielen, GFP_ATOMIC); + + if (ies) { + memcpy(ies, res->pub.proberesp_ies, + ielen); + found->proberesp_ies_allocated = true; + found->pub.proberesp_ies = ies; + found->pub.len_proberesp_ies = ielen; + } + } - if (!found->ies_allocated && ksize(found) >= used + ielen) { - memcpy(found->pub.information_elements, - res->pub.information_elements, ielen); - found->pub.len_information_elements = ielen; + /* Override possible earlier Beacon frame IEs */ + found->pub.information_elements = + found->pub.proberesp_ies; + found->pub.len_information_elements = + found->pub.len_proberesp_ies; + } + if (res->pub.beacon_ies) { + size_t used = dev->wiphy.bss_priv_size + sizeof(*res); + size_t ielen = res->pub.len_beacon_ies; + + if (found->pub.beacon_ies && + !found->beacon_ies_allocated && + ksize(found) >= used + ielen) { + memcpy(found->pub.beacon_ies, + res->pub.beacon_ies, ielen); + found->pub.len_beacon_ies = ielen; } else { - u8 *ies = found->pub.information_elements; + u8 *ies = found->pub.beacon_ies; - if (found->ies_allocated) + if (found->beacon_ies_allocated) ies = krealloc(ies, ielen, GFP_ATOMIC); else ies = kmalloc(ielen, GFP_ATOMIC); if (ies) { - memcpy(ies, res->pub.information_elements, ielen); - found->ies_allocated = true; - found->pub.information_elements = ies; - found->pub.len_information_elements = ielen; + memcpy(ies, res->pub.beacon_ies, + ielen); + found->beacon_ies_allocated = true; + found->pub.beacon_ies = ies; + found->pub.len_beacon_ies = ielen; } } } @@ -489,14 +526,26 @@ cfg80211_inform_bss(struct wiphy *wiphy, res->pub.tsf = timestamp; res->pub.beacon_interval = beacon_interval; res->pub.capability = capability; - /* point to after the private area */ - res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; - memcpy(res->pub.information_elements, ie, ielen); - res->pub.len_information_elements = ielen; + /* + * Since we do not know here whether the IEs are from a Beacon or Probe + * Response frame, we need to pick one of the options and only use it + * with the driver that does not provide the full Beacon/Probe Response + * frame. Use Beacon frame pointer to avoid indicating that this should + * override the information_elements pointer should we have received an + * earlier indication of Probe Response data. + * + * The initial buffer for the IEs is allocated with the BSS entry and + * is located after the private area. + */ + res->pub.beacon_ies = (u8 *)res + sizeof(*res) + privsz; + memcpy(res->pub.beacon_ies, ie, ielen); + res->pub.len_beacon_ies = ielen; + res->pub.information_elements = res->pub.beacon_ies; + res->pub.len_information_elements = res->pub.len_beacon_ies; kref_init(&res->ref); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0); + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res); if (!res) return NULL; @@ -517,7 +566,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, struct cfg80211_internal_bss *res; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - bool overwrite; size_t privsz = wiphy->bss_priv_size; if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && @@ -538,16 +586,28 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - /* point to after the private area */ - res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; - memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen); - res->pub.len_information_elements = ielen; + /* + * The initial buffer for the IEs is allocated with the BSS entry and + * is located after the private area. + */ + if (ieee80211_is_probe_resp(mgmt->frame_control)) { + res->pub.proberesp_ies = (u8 *) res + sizeof(*res) + privsz; + memcpy(res->pub.proberesp_ies, mgmt->u.probe_resp.variable, + ielen); + res->pub.len_proberesp_ies = ielen; + res->pub.information_elements = res->pub.proberesp_ies; + res->pub.len_information_elements = res->pub.len_proberesp_ies; + } else { + res->pub.beacon_ies = (u8 *) res + sizeof(*res) + privsz; + memcpy(res->pub.beacon_ies, mgmt->u.beacon.variable, ielen); + res->pub.len_beacon_ies = ielen; + res->pub.information_elements = res->pub.beacon_ies; + res->pub.len_information_elements = res->pub.len_beacon_ies; + } kref_init(&res->ref); - overwrite = ieee80211_is_probe_resp(mgmt->frame_control); - - res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite); + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res); if (!res) return NULL; -- cgit v1.2.3-70-g09d2 From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 12 Jan 2010 10:42:31 +0200 Subject: mac80211: add U-APSD client support Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The idea is that the data frames from the client trigger AP to send the buffered frames with ACs which have U-APSD enabled. This decreases latency and makes it possible to save even more power. Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current implementation assumes that firmware takes care of the wakeup and hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported. Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running various test traffic with ping. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ include/net/mac80211.h | 7 +++++++ net/mac80211/cfg.c | 7 +++++++ net/mac80211/ieee80211_i.h | 13 ++++++++++++- net/mac80211/main.c | 4 ++++ net/mac80211/mlme.c | 31 ++++++++++++++++++++++++++++--- net/mac80211/scan.c | 18 ++++++++++++++++++ net/mac80211/util.c | 2 ++ net/mac80211/work.c | 12 ++++++++++-- 9 files changed, 106 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 602c0692c3f..a8c6069a0d9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -120,6 +120,24 @@ #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 +/* U-APSD queue for WMM IEs sent by AP */ +#define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) + +/* U-APSD queues for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO (1<<0) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI (1<<1) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK (1<<2) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE (1<<3) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK 0x0f + +/* U-APSD max SP length for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 0x00 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_2 0x01 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_4 0x02 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_6 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e1e73c6abef..f313a3cbabd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -113,6 +113,7 @@ struct ieee80211_tx_queue_params { u16 cw_min; u16 cw_max; u8 aifs; + bool uapsd; }; /** @@ -929,6 +930,11 @@ enum ieee80211_tkip_key_type { * Hardware supports dynamic spatial multiplexing powersave, * ie. can turn off all but one chain and then wake the rest * up as required after, for example, rts/cts handshake. + * + * @IEEE80211_HW_SUPPORTS_UAPSD: + * Hardware supports Unscheduled Automatic Power Save Delivery + * (U-APSD) in managed mode. The mode is configured with + * conf_tx() operation. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -948,6 +954,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_BEACON_FILTER = 1<<14, IEEE80211_HW_SUPPORTS_STATIC_SMPS = 1<<15, IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS = 1<<16, + IEEE80211_HW_SUPPORTS_UAPSD = 1<<17, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dc12e9466ff..8286df5822d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1128,6 +1128,13 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; + + /* + * Setting tx queue params disables u-apsd because it's only + * called in master mode. + */ + p.uapsd = false; + if (drv_conf_tx(local, params->queue, &p)) { printk(KERN_DEBUG "%s: failed to set TX queue " "parameters for queue %d\n", diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3e4ac3f3085..3468e378509 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -58,6 +58,15 @@ struct ieee80211_local; #define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) +#define IEEE80211_DEFAULT_UAPSD_QUEUES \ + (IEEE80211_WMM_IE_STA_QOSINFO_AC_BK | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_VI | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) + +#define IEEE80211_DEFAULT_MAX_SP_LEN \ + IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -78,6 +87,7 @@ struct ieee80211_bss { u8 dtim_period; bool wmm_used; + bool uapsd_supported; unsigned long last_probe_resp; @@ -285,7 +295,7 @@ struct ieee80211_work { u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len; u8 supp_rates_len; - bool wmm_used, use_11n; + bool wmm_used, use_11n, uapsd_used; } assoc; struct { u32 duration; @@ -306,6 +316,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_DISABLE_11N = BIT(4), IEEE80211_STA_CSA_RECEIVED = BIT(5), IEEE80211_STA_MFP_ENABLED = BIT(6), + IEEE80211_STA_UAPSD_ENABLED = BIT(7), }; struct ieee80211_if_managed { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 46882914399..0054bba08ce 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -491,6 +491,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; + WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) + && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), + "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"); + /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 86f025bc945..39c27d83a4f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -569,7 +569,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_tx_queue_params params; size_t left; int count; - u8 *pos; + u8 *pos, uapsd_queues = 0; if (local->hw.queues < 4) return; @@ -579,6 +579,10 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) return; + + if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED) + uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; + count = wmm_param[6] & 0x0f; if (count == ifmgd->wmm_last_param_set) return; @@ -593,6 +597,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, for (; left >= 4; left -= 4, pos += 4) { int aci = (pos[0] >> 5) & 0x03; int acm = (pos[0] >> 4) & 0x01; + bool uapsd = false; int queue; switch (aci) { @@ -600,22 +605,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, queue = 3; if (acm) local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) + uapsd = true; break; case 2: /* AC_VI */ queue = 1; if (acm) local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI) + uapsd = true; break; case 3: /* AC_VO */ queue = 0; if (acm) local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) + uapsd = true; break; case 0: /* AC_BE */ default: queue = 2; if (acm) local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE) + uapsd = true; break; } @@ -623,11 +636,14 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); params.txop = get_unaligned_le16(pos + 2); + params.uapsd = uapsd; + #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " - "cWmin=%d cWmax=%d txop=%d\n", + "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", wiphy_name(local->hw.wiphy), queue, aci, acm, - params.aifs, params.cw_min, params.cw_max, params.txop); + params.aifs, params.cw_min, params.cw_max, params.txop, + params.uapsd); #endif if (drv_conf_tx(local, queue, ¶ms) && local->ops->conf_tx) printk(KERN_DEBUG "%s: failed to set TX queue " @@ -1906,6 +1922,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, wk->assoc.ht_information_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION); + if (bss->wmm_used && bss->uapsd_supported && + (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { + wk->assoc.uapsd_used = true; + ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; + } else { + wk->assoc.uapsd_used = false; + ifmgd->flags &= ~IEEE80211_STA_UAPSD_ENABLED; + } + ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); memcpy(wk->assoc.ssid, ssid + 2, ssid[1]); wk->assoc.ssid_len = ssid[1]; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 30cb62bb45b..9afe2f9885d 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -54,6 +54,23 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv)); } +static bool is_uapsd_supported(struct ieee802_11_elems *elems) +{ + u8 qos_info; + + if (elems->wmm_info && elems->wmm_info_len == 7 + && elems->wmm_info[5] == 1) + qos_info = elems->wmm_info[6]; + else if (elems->wmm_param && elems->wmm_param_len == 24 + && elems->wmm_param[5] == 1) + qos_info = elems->wmm_param[6]; + else + /* no valid wmm information or parameter element found */ + return false; + + return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD; +} + struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, @@ -117,6 +134,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, } bss->wmm_used = elems->wmm_param || elems->wmm_info; + bss->uapsd_supported = is_uapsd_supported(elems); if (!beacon) bss->last_probe_resp = jiffies; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a2ba6e29bd9..e278f97c830 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -792,6 +792,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) break; } + qparam.uapsd = false; + drv_conf_tx(local, queue, &qparam); } } diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 7c5d95b1bc0..a74fd6ee008 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -202,7 +202,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos; + u8 *pos, qos_info; const u8 *ies; size_t offset = 0, noffset; int i, len, count, rates_len, supp_rates_len; @@ -375,6 +375,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } if (wk->assoc.wmm_used && local->hw.queues >= 4) { + if (wk->assoc.uapsd_used) { + qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES; + qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN << + IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT); + } else { + qos_info = 0; + } + pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -384,7 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, *pos++ = 2; /* WME */ *pos++ = 0; /* WME info */ *pos++ = 1; /* WME ver */ - *pos++ = 0; + *pos++ = qos_info; } /* add any remaining custom (i.e. vendor specific here) IEs */ -- cgit v1.2.3-70-g09d2 From 558a6669d7cb407fbb0b5aec184b5c3b9a893d30 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 12 Jan 2010 10:43:00 +0200 Subject: ieee80211: add struct ieee80211_hdr_qos The header can be used to create qos nullfunc frames, for example. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a8c6069a0d9..842701906ae 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -157,6 +157,16 @@ struct ieee80211_hdr_3addr { __le16 seq_ctrl; } __attribute__ ((packed)); +struct ieee80211_qos_hdr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[6]; + u8 addr2[6]; + u8 addr3[6]; + __le16 seq_ctrl; + __le16 qos_ctrl; +} __attribute__ ((packed)); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3-70-g09d2 From bf66f18e79e34c421bbd8f6511e2c556b779df2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:10 -0800 Subject: rcu: Add force_quiescent_state() testing to rcutorture Add force_quiescent_state() testing to rcutorture, with a separate thread that repeatedly invokes force_quiescent_state() in bursts. This can greatly increase the probability of encountering certain types of race conditions. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646551116-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutiny.h | 12 ++++++++ include/linux/rcutree.h | 3 ++ kernel/rcutorture.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- kernel/rcutree.c | 18 +++++++++++ kernel/rcutree_plugin.h | 19 ++++++++++++ 5 files changed, 130 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 96cc307ed9f..2b70d4e3738 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -62,6 +62,18 @@ static inline long rcu_batches_completed_bh(void) extern int rcu_expedited_torture_stats(char *page); +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + #define synchronize_rcu synchronize_sched static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 8044b1b9433..704a010f686 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -99,6 +99,9 @@ extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); +extern void rcu_force_quiescent_state(void); +extern void rcu_bh_force_quiescent_state(void); +extern void rcu_sched_force_quiescent_state(void); #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9bb52177af0..adda92bfafa 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ static int stutter = 5; /* Start/stop testing interval (in sec) */ static int irqreader = 1; /* RCU readers from irq (timers). */ +static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ +static int fqs_holdoff = 0; /* Hold time within burst (us). */ +static int fqs_stutter = 3; /* Wait time between bursts (s). */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -79,6 +82,12 @@ module_param(stutter, int, 0444); MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); module_param(irqreader, int, 0444); MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); +module_param(fqs_duration, int, 0444); +MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)"); +module_param(fqs_holdoff, int, 0444); +MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); +module_param(fqs_stutter, int, 0444); +MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -99,6 +108,7 @@ static struct task_struct **reader_tasks; static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; +static struct task_struct *fqs_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -263,6 +273,7 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*cb_barrier)(void); + void (*fqs)(void); int (*stats)(char *page); int irq_capable; char *name; @@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = rcu_barrier, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu" @@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_sync" @@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_expedited, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_expedited" @@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = { .deferred_free = rcu_bh_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = rcu_barrier_bh, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh" @@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh_sync" @@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = { .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "sched" @@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .name = "sched_sync" }; @@ -650,11 +668,44 @@ static struct rcu_torture_ops sched_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = rcu_expedited_torture_stats, .irq_capable = 1, .name = "sched_expedited" }; +/* + * RCU torture force-quiescent-state kthread. Repeatedly induces + * bursts of calls to force_quiescent_state(), increasing the probability + * of occurrence of some important types of race conditions. + */ +static int +rcu_torture_fqs(void *arg) +{ + unsigned long fqs_resume_time; + int fqs_burst_remaining; + + VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); + do { + fqs_resume_time = jiffies + fqs_stutter * HZ; + while (jiffies - fqs_resume_time > LONG_MAX) { + schedule_timeout_interruptible(1); + } + fqs_burst_remaining = fqs_duration; + while (fqs_burst_remaining > 0) { + cur_ops->fqs(); + udelay(fqs_holdoff); + fqs_burst_remaining -= fqs_holdoff; + } + rcu_stutter_wait("rcu_torture_fqs"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_fqs"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + /* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure @@ -1030,10 +1081,11 @@ rcu_torture_print_module_parms(char *tag) printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d\n", + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); } static struct notifier_block rcutorture_nb = { @@ -1109,6 +1161,12 @@ rcu_torture_cleanup(void) } stats_task = NULL; + if (fqs_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); + kthread_stop(fqs_task); + } + fqs_task = NULL; + /* Wait for all RCU callbacks to fire. */ if (cur_ops->cb_barrier != NULL) @@ -1154,6 +1212,11 @@ rcu_torture_init(void) mutex_unlock(&fullstop_mutex); return -EINVAL; } + if (cur_ops->fqs == NULL && fqs_duration != 0) { + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " + "fqs_duration, fqs disabled.\n"); + fqs_duration = 0; + } if (cur_ops->init) cur_ops->init(); /* no "goto unwind" prior to this point!!! */ @@ -1282,6 +1345,19 @@ rcu_torture_init(void) goto unwind; } } + if (fqs_duration < 0) + fqs_duration = 0; + if (fqs_duration) { + /* Create the stutter thread */ + fqs_task = kthread_run(rcu_torture_fqs, NULL, + "rcu_torture_fqs"); + if (IS_ERR(fqs_task)) { + firsterr = PTR_ERR(fqs_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); + fqs_task = NULL; + goto unwind; + } + } register_reboot_notifier(&rcutorture_nb); mutex_unlock(&fullstop_mutex); return 0; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 55e8f6ef819..0a4c3287939 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -156,6 +156,24 @@ long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Force a quiescent state for RCU BH. + */ +void rcu_bh_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_bh_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); + +/* + * Force a quiescent state for RCU-sched. + */ +void rcu_sched_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_sched_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); + /* * Does the CPU have callbacks ready to be invoked? */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 37fbccdf41d..f11ebd44b45 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -61,6 +61,15 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for preemptible RCU. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_preempt_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Record a preemptable-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -712,6 +721,16 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for RCU, which, because there is no preemptible + * RCU, becomes the same as rcu-sched. + */ +void rcu_force_quiescent_state(void) +{ + rcu_sched_force_quiescent_state(); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. -- cgit v1.2.3-70-g09d2 From 9ca94d7c016130f9ed77f142424ace9c19742809 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Mon, 11 Jan 2010 21:21:06 +0100 Subject: plist: Fix grammar mistake, and c-style mistake Signed-off-by: John Kacur Acked-by: Peter Zijlstra LKML-Reference: <1263241267-25204-2-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/plist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/plist.h b/include/linux/plist.h index 8227f717c70..6898985e7b3 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -45,7 +45,7 @@ * the insertion of new nodes. There are no nodes with duplicate * priorites on the list. * - * The nodes on the node_list is ordered by priority and can contain + * The nodes on the node_list are ordered by priority and can contain * entries which have the same priority. Those entries are ordered * FIFO * @@ -265,7 +265,7 @@ static inline int plist_node_empty(const struct plist_node *node) * * Assumes the plist is _not_ empty. */ -static inline struct plist_node* plist_first(const struct plist_head *head) +static inline struct plist_node *plist_first(const struct plist_head *head) { return list_entry(head->node_list.next, struct plist_node, plist.node_list); -- cgit v1.2.3-70-g09d2 From 599faa0e264fe2e7f563f87b4aad8c83e9dc46d1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jan 2010 13:29:58 +0000 Subject: genirq: Fix documentation of default chip disable() The documentation says that by default disable() will be chip->mask but in fact default_disable() is a noop. Signed-off-by: Mark Brown LKML-Reference: <1262698198-30392-1-git-send-email-broonie@opensource.wolfsonmicro.com> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 451481c082b..d13492df57a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -90,7 +90,7 @@ struct msi_desc; * @startup: start up the interrupt (defaults to ->enable if NULL) * @shutdown: shut down the interrupt (defaults to ->disable if NULL) * @enable: enable the interrupt (defaults to chip->unmask if NULL) - * @disable: disable the interrupt (defaults to chip->mask if NULL) + * @disable: disable the interrupt * @ack: start of a new interrupt * @mask: mask an interrupt source * @mask_ack: ack and mask an interrupt source -- cgit v1.2.3-70-g09d2 From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 13 Jan 2010 16:02:14 +0100 Subject: netfilter: nfnetlink: netns support Make nfnl socket per-petns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 8 ++--- include/net/net_namespace.h | 2 ++ net/netfilter/nf_conntrack_netlink.c | 13 ++++---- net/netfilter/nfnetlink.c | 65 +++++++++++++++++++++++------------- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 2 +- 6 files changed, 58 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 49d321f3ccd..53923868c9b 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -73,11 +73,11 @@ struct nfnetlink_subsystem { extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); -extern int nfnetlink_has_listeners(unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, +extern int nfnetlink_has_listeners(struct net *net, unsigned int group); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); extern void nfnl_unlock(void); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f307e133d14..82b7be4db89 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -81,6 +81,8 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif + struct sock *nfnl; + struct sock *nfnl_stash; #endif #ifdef CONFIG_XFRM struct netns_xfrm xfrm; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59d8064eb52..d4c5d06677f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } else return 0; - if (!item->report && !nfnetlink_has_listeners(group)) + if (!item->report && !nfnetlink_has_listeners(&init_net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); @@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + err = nfnetlink_send(skb, &init_net, item->pid, group, item->report, + GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -571,7 +572,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, group, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, group, -ENOBUFS); return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) return 0; if (!item->report && - !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report, GFP_ATOMIC); return 0; @@ -1572,7 +1573,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, 0, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS); return 0; } #endif diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index eedc0c1ac7a..8eb0cc23ada 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); @@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -int nfnetlink_has_listeners(unsigned int group) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(nfnl, group); + return netlink_has_listeners(net->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(u32 pid, u32 group, int error) +void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(nfnl, pid, group, error); + netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, pid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); + err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; @@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfnl_unlock(); } -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) { - printk("Removing netfilter NETLINK layer.\n"); - netlink_kernel_release(nfnl); - return; + struct sock *nfnl; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; } -static int __init nfnetlink_init(void) +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { - printk("Netfilter messages via NETLINK v%s.\n", nfversion); + struct net *net; - nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, NULL, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -ENOMEM; - } + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); +} - return 0; +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + +static int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9de0470d557..285e9029a9f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst) NLMSG_DONE, sizeof(struct nfgenmsg)); - status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + MSG_DONTWAIT); inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7e3fa410641..5c589b27d6e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; -- cgit v1.2.3-70-g09d2 From 508e14b4a4fb1a824a14f2c5b8d7df67b313f8e4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Jan 2010 14:27:30 +0000 Subject: netpoll: allow execution of multiple rx_hooks per interface Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netpoll.h | 11 +++- net/core/netpoll.c | 169 ++++++++++++++++++++++++++++++------------------ 2 files changed, 114 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 2524267210d..a765ea89854 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -21,15 +21,20 @@ struct netpoll { __be32 local_ip, remote_ip; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; + + struct list_head rx; /* rx_np list element */ }; struct netpoll_info { atomic_t refcnt; + int rx_flags; spinlock_t rx_lock; - struct netpoll *rx_np; /* netpoll that registered an rx_hook */ + struct list_head rx_np; /* netpolls that registered an rx_hook */ + struct sk_buff_head arp_tx; /* list of arp requests to reply to */ struct sk_buff_head txq; + struct delayed_work tx_work; }; @@ -51,7 +56,7 @@ static inline int netpoll_rx(struct sk_buff *skb) unsigned long flags; int ret = 0; - if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) + if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags)) return 0; spin_lock_irqsave(&npinfo->rx_lock, flags); @@ -67,7 +72,7 @@ static inline int netpoll_rx_on(struct sk_buff *skb) { struct netpoll_info *npinfo = skb->dev->npinfo; - return npinfo && (npinfo->rx_np || npinfo->rx_flags); + return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags); } static inline int netpoll_receive_skb(struct sk_buff *skb) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 0b4d0d35ef4..7aa69725376 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -407,11 +407,24 @@ static void arp_reply(struct sk_buff *skb) __be32 sip, tip; unsigned char *sha; struct sk_buff *send_skb; - struct netpoll *np = NULL; + struct netpoll *np, *tmp; + unsigned long flags; + int hits = 0; + + if (list_empty(&npinfo->rx_np)) + return; + + /* Before checking the packet, we do some early + inspection whether this is interesting at all */ + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->dev == skb->dev) + hits++; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) - np = npinfo->rx_np; - if (!np) + /* No netpoll struct is using this dev */ + if (!hits) return; /* No arp on this interface */ @@ -437,77 +450,91 @@ static void arp_reply(struct sk_buff *skb) arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - /* if we actually cared about dst hw addr, it would get copied here */ + /* If we actually cared about dst hw addr, + it would get copied here */ arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ - if (tip != np->local_ip || - ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; size = arp_hdr_len(skb->dev); - send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), - LL_RESERVED_SPACE(np->dev)); - if (!send_skb) - return; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (tip != np->local_ip) + continue; - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - return; - } + send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), + LL_RESERVED_SPACE(np->dev)); + if (!send_skb) + continue; - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should always equal 1 (Ethernet). - */ + skb_reset_network_header(send_skb); + arp = (struct arphdr *) skb_put(send_skb, size); + send_skb->dev = skb->dev; + send_skb->protocol = htons(ETH_P_ARP); - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); + /* Fill the device header for the ARP frame */ + if (dev_hard_header(send_skb, skb->dev, ptype, + sha, np->dev->dev_addr, + send_skb->len) < 0) { + kfree_skb(send_skb); + continue; + } - arp_ptr=(unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); + /* + * Fill out the arp protocol part. + * + * we only support ethernet device type, + * which (according to RFC 1390) should + * always equal 1 (Ethernet). + */ - netpoll_send_skb(np, send_skb); + arp->ar_hrd = htons(np->dev->type); + arp->ar_pro = htons(ETH_P_IP); + arp->ar_hln = np->dev->addr_len; + arp->ar_pln = 4; + arp->ar_op = htons(type); + + arp_ptr = (unsigned char *)(arp + 1); + memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &tip, 4); + arp_ptr += 4; + memcpy(arp_ptr, sha, np->dev->addr_len); + arp_ptr += np->dev->addr_len; + memcpy(arp_ptr, &sip, 4); + + netpoll_send_skb(np, send_skb); + + /* If there are several rx_hooks for the same address, + we're fine by sending a single reply */ + break; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); } int __netpoll_rx(struct sk_buff *skb) { int proto, len, ulen; + int hits = 0; struct iphdr *iph; struct udphdr *uh; - struct netpoll_info *npi = skb->dev->npinfo; - struct netpoll *np = npi->rx_np; + struct netpoll_info *npinfo = skb->dev->npinfo; + struct netpoll *np, *tmp; - if (!np) + if (list_empty(&npinfo->rx_np)) goto out; + if (skb->dev->type != ARPHRD_ETHER) goto out; /* check if netpoll clients need ARP */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { - skb_queue_tail(&npi->arp_tx, skb); + skb_queue_tail(&npinfo->arp_tx, skb); return 1; } @@ -551,16 +578,23 @@ int __netpoll_rx(struct sk_buff *skb) goto out; if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) goto out; - if (np->local_ip && np->local_ip != iph->daddr) - goto out; - if (np->remote_ip && np->remote_ip != iph->saddr) - goto out; - if (np->local_port && np->local_port != ntohs(uh->dest)) - goto out; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + if (np->local_ip && np->local_ip != iph->daddr) + continue; + if (np->remote_ip && np->remote_ip != iph->saddr) + continue; + if (np->local_port && np->local_port != ntohs(uh->dest)) + continue; + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), + ulen - sizeof(struct udphdr)); + hits++; + } + + if (!hits) + goto out; kfree_skb(skb); return 1; @@ -684,6 +718,7 @@ int netpoll_setup(struct netpoll *np) struct net_device *ndev = NULL; struct in_device *in_dev; struct netpoll_info *npinfo; + struct netpoll *npe, *tmp; unsigned long flags; int err; @@ -704,7 +739,7 @@ int netpoll_setup(struct netpoll *np) } npinfo->rx_flags = 0; - npinfo->rx_np = NULL; + INIT_LIST_HEAD(&npinfo->rx_np); spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); @@ -785,7 +820,7 @@ int netpoll_setup(struct netpoll *np) if (np->rx_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; - npinfo->rx_np = np; + list_add_tail(&np->rx, &npinfo->rx_np); spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -801,9 +836,16 @@ int netpoll_setup(struct netpoll *np) return 0; release: - if (!ndev->npinfo) + if (!ndev->npinfo) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) { + npe->dev = NULL; + } + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + kfree(npinfo); - np->dev = NULL; + } + dev_put(ndev); return err; } @@ -823,10 +865,11 @@ void netpoll_cleanup(struct netpoll *np) if (np->dev) { npinfo = np->dev->npinfo; if (npinfo) { - if (npinfo->rx_np == np) { + if (!list_empty(&npinfo->rx_np)) { spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + list_del(&np->rx); + if (list_empty(&npinfo->rx_np)) + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -- cgit v1.2.3-70-g09d2 From 9a58a80a701bdb2d220cdab4914218df5b48d781 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 14 Jan 2010 03:10:54 -0800 Subject: proc_fops: convert drivers/isdn/ to seq_file Convert code away from ->read_proc/->write_proc interfaces. Switch to proc_create()/proc_create_data() which make addition of proc entries reliable wrt NULL ->proc_fops, NULL ->data and so on. Problem with ->read_proc et al is described here commit 786d7e1612f0b0adb6046f19b906609e4fe8b1ba "Fix rmmod/read/write races in /proc entries" [akpm@linux-foundation.org: CONFIG_PROC_FS=n build fix] Signed-off-by: Alexey Dobriyan Signed-off-by: Tilman Schmidt Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- Documentation/isdn/INTERFACE.CAPI | 9 +- drivers/isdn/capi/capi.c | 99 ++++++---------- drivers/isdn/capi/capidrv.c | 55 +++------ drivers/isdn/capi/kcapi.c | 8 +- drivers/isdn/gigaset/capi.c | 75 ++++++------ drivers/isdn/hardware/avm/avmcard.h | 6 +- drivers/isdn/hardware/avm/b1.c | 54 +++++---- drivers/isdn/hardware/avm/b1dma.c | 71 ++++++------ drivers/isdn/hardware/avm/b1isa.c | 2 +- drivers/isdn/hardware/avm/b1pci.c | 4 +- drivers/isdn/hardware/avm/b1pcmcia.c | 2 +- drivers/isdn/hardware/avm/c4.c | 53 +++++---- drivers/isdn/hardware/avm/t1isa.c | 2 +- drivers/isdn/hardware/avm/t1pci.c | 2 +- drivers/isdn/hardware/eicon/capimain.c | 40 ++++--- drivers/isdn/hardware/eicon/diva_didd.c | 45 ++++---- drivers/isdn/hardware/eicon/divasi.c | 48 ++++---- drivers/isdn/hardware/eicon/divasproc.c | 198 ++++++++++++++------------------ drivers/isdn/hysdn/hycapi.c | 56 ++++----- include/linux/isdn/capilli.h | 3 +- net/bluetooth/cmtp/capi.c | 37 +++--- 21 files changed, 411 insertions(+), 458 deletions(-) (limited to 'include/linux') diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI index 5fe8de5cc72..f172091fb7c 100644 --- a/Documentation/isdn/INTERFACE.CAPI +++ b/Documentation/isdn/INTERFACE.CAPI @@ -149,10 +149,11 @@ char *(*procinfo)(struct capi_ctr *ctrlr) pointer to a callback function returning the entry for the device in the CAPI controller info table, /proc/capi/controller -read_proc_t *ctr_read_proc - pointer to the read_proc callback function for the device's proc file - system entry, /proc/capi/controllers/; will be called with a - pointer to the device's capi_ctr structure as the last (data) argument +const struct file_operations *proc_fops + pointers to callback functions for the device's proc file + system entry, /proc/capi/controllers/; pointer to the device's + capi_ctr structure is available from struct proc_dir_entry::data + which is available from struct inode. Note: Callback functions except send_message() are never called in interrupt context. diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 65bf91e16a4..79f9364aded 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -33,6 +33,7 @@ #endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */ #include #include +#include #include #include #include @@ -1407,114 +1408,84 @@ static void capinc_tty_exit(void) * /proc/capi/capi20: * minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt */ -static int proc_capidev_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int capi20_proc_show(struct seq_file *m, void *v) { struct capidev *cdev; struct list_head *l; - int len = 0; read_lock(&capidev_list_lock); list_for_each(l, &capidev_list) { cdev = list_entry(l, struct capidev, list); - len += sprintf(page+len, "0 %d %lu %lu %lu %lu\n", + seq_printf(m, "0 %d %lu %lu %lu %lu\n", cdev->ap.applid, cdev->ap.nrecvctlpkt, cdev->ap.nrecvdatapkt, cdev->ap.nsentctlpkt, cdev->ap.nsentdatapkt); - if (len <= off) { - off -= len; - len = 0; - } else { - if (len-off > count) - goto endloop; - } } - -endloop: read_unlock(&capidev_list_lock); - if (len < count) - *eof = 1; - if (len > count) len = count; - if (len < 0) len = 0; - return len; + return 0; } +static int capi20_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, capi20_proc_show, NULL); +} + +static const struct file_operations capi20_proc_fops = { + .owner = THIS_MODULE, + .open = capi20_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * /proc/capi/capi20ncci: * applid ncci */ -static int proc_capincci_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int capi20ncci_proc_show(struct seq_file *m, void *v) { struct capidev *cdev; struct capincci *np; struct list_head *l; - int len = 0; read_lock(&capidev_list_lock); list_for_each(l, &capidev_list) { cdev = list_entry(l, struct capidev, list); for (np=cdev->nccis; np; np = np->next) { - len += sprintf(page+len, "%d 0x%x\n", + seq_printf(m, "%d 0x%x\n", cdev->ap.applid, np->ncci); - if (len <= off) { - off -= len; - len = 0; - } else { - if (len-off > count) - goto endloop; - } } } -endloop: read_unlock(&capidev_list_lock); - *start = page+off; - if (len < count) - *eof = 1; - if (len>count) len = count; - if (len<0) len = 0; - return len; + return 0; } -static struct procfsentries { - char *name; - mode_t mode; - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); - struct proc_dir_entry *procent; -} procfsentries[] = { - /* { "capi", S_IFDIR, 0 }, */ - { "capi/capi20", 0 , proc_capidev_read_proc }, - { "capi/capi20ncci", 0 , proc_capincci_read_proc }, +static int capi20ncci_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, capi20ncci_proc_show, NULL); +} + +static const struct file_operations capi20ncci_proc_fops = { + .owner = THIS_MODULE, + .open = capi20ncci_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; static void __init proc_init(void) { - int nelem = ARRAY_SIZE(procfsentries); - int i; - - for (i=0; i < nelem; i++) { - struct procfsentries *p = procfsentries + i; - p->procent = create_proc_entry(p->name, p->mode, NULL); - if (p->procent) p->procent->read_proc = p->read_proc; - } + proc_create("capi/capi20", 0, NULL, &capi20_proc_fops); + proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops); } static void __exit proc_exit(void) { - int nelem = ARRAY_SIZE(procfsentries); - int i; - - for (i=nelem-1; i >= 0; i--) { - struct procfsentries *p = procfsentries + i; - if (p->procent) { - remove_proc_entry(p->name, NULL); - p->procent = NULL; - } - } + remove_proc_entry("capi/capi20", NULL); + remove_proc_entry("capi/capi20ncci", NULL); } /* -------- init function and module interface ---------------------- */ diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index 66b7d7a8647..bb450152fb7 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -2229,59 +2230,37 @@ static void lower_callback(unsigned int cmd, u32 contr, void *data) * /proc/capi/capidrv: * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt */ -static int proc_capidrv_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int capidrv_proc_show(struct seq_file *m, void *v) { - int len = 0; - - len += sprintf(page+len, "%lu %lu %lu %lu\n", + seq_printf(m, "%lu %lu %lu %lu\n", global.ap.nrecvctlpkt, global.ap.nrecvdatapkt, global.ap.nsentctlpkt, global.ap.nsentdatapkt); - if (off+count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len-off) ? count : len-off); + return 0; +} + +static int capidrv_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, capidrv_proc_show, NULL); } -static struct procfsentries { - char *name; - mode_t mode; - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); - struct proc_dir_entry *procent; -} procfsentries[] = { - /* { "capi", S_IFDIR, 0 }, */ - { "capi/capidrv", 0 , proc_capidrv_read_proc }, +static const struct file_operations capidrv_proc_fops = { + .owner = THIS_MODULE, + .open = capidrv_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; static void __init proc_init(void) { - int nelem = ARRAY_SIZE(procfsentries); - int i; - - for (i=0; i < nelem; i++) { - struct procfsentries *p = procfsentries + i; - p->procent = create_proc_entry(p->name, p->mode, NULL); - if (p->procent) p->procent->read_proc = p->read_proc; - } + proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops); } static void __exit proc_exit(void) { - int nelem = ARRAY_SIZE(procfsentries); - int i; - - for (i=nelem-1; i >= 0; i--) { - struct procfsentries *p = procfsentries + i; - if (p->procent) { - remove_proc_entry(p->name, NULL); - p->procent = NULL; - } - } + remove_proc_entry("capi/capidrv", NULL); } static int __init capidrv_init(void) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index dc506ab99ca..b0bacf377c1 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -490,13 +490,7 @@ attach_capi_ctr(struct capi_ctr *card) card->traceflag = showcapimsgs; sprintf(card->procfn, "capi/controllers/%d", card->cnr); - card->procent = create_proc_entry(card->procfn, 0, NULL); - if (card->procent) { - card->procent->read_proc = - (int (*)(char *,char **,off_t,int,int *,void *)) - card->ctr_read_proc; - card->procent->data = card; - } + card->procent = proc_create_data(card->procfn, 0, NULL, card->proc_fops, card); ncards++; printk(KERN_NOTICE "kcapi: Controller [%03d]: %s attached\n", diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c index 3f5cd06af10..6f0ae32906b 100644 --- a/drivers/isdn/gigaset/capi.c +++ b/drivers/isdn/gigaset/capi.c @@ -13,6 +13,8 @@ #include "gigaset.h" #include +#include +#include #include #include #include @@ -2106,35 +2108,22 @@ static char *gigaset_procinfo(struct capi_ctr *ctr) return ctr->name; /* ToDo: more? */ } -/** - * gigaset_ctr_read_proc() - build controller proc file entry - * @page: buffer of PAGE_SIZE bytes for receiving the entry. - * @start: unused. - * @off: unused. - * @count: unused. - * @eof: unused. - * @ctr: controller descriptor structure. - * - * Return value: length of generated entry - */ -static int gigaset_ctr_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctr) +static int gigaset_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctr = m->private; struct cardstate *cs = ctr->driverdata; char *s; int i; - int len = 0; - len += sprintf(page+len, "%-16s %s\n", "name", ctr->name); - len += sprintf(page+len, "%-16s %s %s\n", "dev", + + seq_printf(m, "%-16s %s\n", "name", ctr->name); + seq_printf(m, "%-16s %s %s\n", "dev", dev_driver_string(cs->dev), dev_name(cs->dev)); - len += sprintf(page+len, "%-16s %d\n", "id", cs->myid); + seq_printf(m, "%-16s %d\n", "id", cs->myid); if (cs->gotfwver) - len += sprintf(page+len, "%-16s %d.%d.%d.%d\n", "firmware", + seq_printf(m, "%-16s %d.%d.%d.%d\n", "firmware", cs->fwver[0], cs->fwver[1], cs->fwver[2], cs->fwver[3]); - len += sprintf(page+len, "%-16s %d\n", "channels", - cs->channels); - len += sprintf(page+len, "%-16s %s\n", "onechannel", - cs->onechannel ? "yes" : "no"); + seq_printf(m, "%-16s %d\n", "channels", cs->channels); + seq_printf(m, "%-16s %s\n", "onechannel", cs->onechannel ? "yes" : "no"); switch (cs->mode) { case M_UNKNOWN: @@ -2152,7 +2141,7 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off, default: s = "??"; } - len += sprintf(page+len, "%-16s %s\n", "mode", s); + seq_printf(m, "%-16s %s\n", "mode", s); switch (cs->mstate) { case MS_UNINITIALIZED: @@ -2176,25 +2165,21 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off, default: s = "??"; } - len += sprintf(page+len, "%-16s %s\n", "mstate", s); + seq_printf(m, "%-16s %s\n", "mstate", s); - len += sprintf(page+len, "%-16s %s\n", "running", - cs->running ? "yes" : "no"); - len += sprintf(page+len, "%-16s %s\n", "connected", - cs->connected ? "yes" : "no"); - len += sprintf(page+len, "%-16s %s\n", "isdn_up", - cs->isdn_up ? "yes" : "no"); - len += sprintf(page+len, "%-16s %s\n", "cidmode", - cs->cidmode ? "yes" : "no"); + seq_printf(m, "%-16s %s\n", "running", cs->running ? "yes" : "no"); + seq_printf(m, "%-16s %s\n", "connected", cs->connected ? "yes" : "no"); + seq_printf(m, "%-16s %s\n", "isdn_up", cs->isdn_up ? "yes" : "no"); + seq_printf(m, "%-16s %s\n", "cidmode", cs->cidmode ? "yes" : "no"); for (i = 0; i < cs->channels; i++) { - len += sprintf(page+len, "[%d]%-13s %d\n", i, "corrupted", + seq_printf(m, "[%d]%-13s %d\n", i, "corrupted", cs->bcs[i].corrupted); - len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_down", + seq_printf(m, "[%d]%-13s %d\n", i, "trans_down", cs->bcs[i].trans_down); - len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_up", + seq_printf(m, "[%d]%-13s %d\n", i, "trans_up", cs->bcs[i].trans_up); - len += sprintf(page+len, "[%d]%-13s %d\n", i, "chstate", + seq_printf(m, "[%d]%-13s %d\n", i, "chstate", cs->bcs[i].chstate); switch (cs->bcs[i].proto2) { case L2_BITSYNC: @@ -2209,11 +2194,23 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off, default: s = "??"; } - len += sprintf(page+len, "[%d]%-13s %s\n", i, "proto2", s); + seq_printf(m, "[%d]%-13s %s\n", i, "proto2", s); } - return len; + return 0; } +static int gigaset_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, gigaset_proc_show, PDE(inode)->data); +} + +static const struct file_operations gigaset_proc_fops = { + .owner = THIS_MODULE, + .open = gigaset_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; static struct capi_driver capi_driver_gigaset = { .name = "gigaset", @@ -2256,7 +2253,7 @@ int gigaset_isdn_register(struct cardstate *cs, const char *isdnid) iif->ctr.release_appl = gigaset_release_appl; iif->ctr.send_message = gigaset_send_message; iif->ctr.procinfo = gigaset_procinfo; - iif->ctr.ctr_read_proc = gigaset_ctr_read_proc; + iif->ctr.proc_fops = &gigaset_proc_fops; INIT_LIST_HEAD(&iif->appls); skb_queue_head_init(&iif->sendqueue); atomic_set(&iif->sendqlen, 0); diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h index d964f07e4a5..a70e8854461 100644 --- a/drivers/isdn/hardware/avm/avmcard.h +++ b/drivers/isdn/hardware/avm/avmcard.h @@ -556,8 +556,7 @@ u16 b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb); void b1_parse_version(avmctrl_info *card); irqreturn_t b1_interrupt(int interrupt, void *devptr); -int b1ctl_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl); +extern const struct file_operations b1ctl_proc_fops; avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *, long rsize, long ssize); @@ -577,7 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl, capi_register_params *rp); void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl); u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb); -int b1dmactl_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl); +extern const struct file_operations b1dmactl_proc_fops; #endif /* _AVMCARD_H_ */ diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index a7c0083e78a..c38fa0f4c72 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -634,18 +636,17 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr) } /* ------------------------------------------------------------- */ -int b1ctl_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl) +static int b1ctl_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); avmcard *card = cinfo->card; u8 flag; - int len = 0; char *s; - len += sprintf(page+len, "%-16s %s\n", "name", card->name); - len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port); - len += sprintf(page+len, "%-16s %d\n", "irq", card->irq); + seq_printf(m, "%-16s %s\n", "name", card->name); + seq_printf(m, "%-16s 0x%x\n", "io", card->port); + seq_printf(m, "%-16s %d\n", "irq", card->irq); switch (card->cardtype) { case avm_b1isa: s = "B1 ISA"; break; case avm_b1pci: s = "B1 PCI"; break; @@ -658,20 +659,20 @@ int b1ctl_read_proc(char *page, char **start, off_t off, case avm_c2: s = "C2"; break; default: s = "???"; break; } - len += sprintf(page+len, "%-16s %s\n", "type", s); + seq_printf(m, "%-16s %s\n", "type", s); if (card->cardtype == avm_t1isa) - len += sprintf(page+len, "%-16s %d\n", "cardnr", card->cardnr); + seq_printf(m, "%-16s %d\n", "cardnr", card->cardnr); if ((s = cinfo->version[VER_DRIVER]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_driver", s); + seq_printf(m, "%-16s %s\n", "ver_driver", s); if ((s = cinfo->version[VER_CARDTYPE]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s); + seq_printf(m, "%-16s %s\n", "ver_cardtype", s); if ((s = cinfo->version[VER_SERIAL]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_serial", s); + seq_printf(m, "%-16s %s\n", "ver_serial", s); if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[3]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s%s%s%s\n", "protocol", (flag & 0x01) ? " DSS1" : "", (flag & 0x02) ? " CT1" : "", @@ -685,7 +686,7 @@ int b1ctl_read_proc(char *page, char **start, off_t off, if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[5]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s\n", "linetype", (flag & 0x01) ? " point to point" : "", (flag & 0x02) ? " point to multipoint" : "", @@ -693,16 +694,25 @@ int b1ctl_read_proc(char *page, char **start, off_t off, (flag & 0x04) ? " leased line with D-channel" : "" ); } - len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname); - - if (off+count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len-off) ? count : len-off); + seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname); + + return 0; +} + +static int b1ctl_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, b1ctl_proc_show, PDE(inode)->data); } +const struct file_operations b1ctl_proc_fops = { + .owner = THIS_MODULE, + .open = b1ctl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL(b1ctl_proc_fops); + /* ------------------------------------------------------------- */ #ifdef CONFIG_PCI @@ -781,8 +791,6 @@ EXPORT_SYMBOL(b1_send_message); EXPORT_SYMBOL(b1_parse_version); EXPORT_SYMBOL(b1_interrupt); -EXPORT_SYMBOL(b1ctl_read_proc); - static int __init b1_init(void) { char *p; diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c index 0e84aaae43f..124550d0dbf 100644 --- a/drivers/isdn/hardware/avm/b1dma.c +++ b/drivers/isdn/hardware/avm/b1dma.c @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include #include @@ -855,21 +857,20 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) /* ------------------------------------------------------------- */ -int b1dmactl_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl) +static int b1dmactl_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); avmcard *card = cinfo->card; u8 flag; - int len = 0; char *s; u32 txoff, txlen, rxoff, rxlen, csr; unsigned long flags; - len += sprintf(page+len, "%-16s %s\n", "name", card->name); - len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port); - len += sprintf(page+len, "%-16s %d\n", "irq", card->irq); - len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase); + seq_printf(m, "%-16s %s\n", "name", card->name); + seq_printf(m, "%-16s 0x%x\n", "io", card->port); + seq_printf(m, "%-16s %d\n", "irq", card->irq); + seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase); switch (card->cardtype) { case avm_b1isa: s = "B1 ISA"; break; case avm_b1pci: s = "B1 PCI"; break; @@ -882,18 +883,18 @@ int b1dmactl_read_proc(char *page, char **start, off_t off, case avm_c2: s = "C2"; break; default: s = "???"; break; } - len += sprintf(page+len, "%-16s %s\n", "type", s); + seq_printf(m, "%-16s %s\n", "type", s); if ((s = cinfo->version[VER_DRIVER]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_driver", s); + seq_printf(m, "%-16s %s\n", "ver_driver", s); if ((s = cinfo->version[VER_CARDTYPE]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s); + seq_printf(m, "%-16s %s\n", "ver_cardtype", s); if ((s = cinfo->version[VER_SERIAL]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_serial", s); + seq_printf(m, "%-16s %s\n", "ver_serial", s); if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[3]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s%s%s%s\n", "protocol", (flag & 0x01) ? " DSS1" : "", (flag & 0x02) ? " CT1" : "", @@ -907,7 +908,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off, if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[5]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s\n", "linetype", (flag & 0x01) ? " point to point" : "", (flag & 0x02) ? " point to multipoint" : "", @@ -915,7 +916,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off, (flag & 0x04) ? " leased line with D-channel" : "" ); } - len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname); + seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname); spin_lock_irqsave(&card->lock, flags); @@ -930,27 +931,30 @@ int b1dmactl_read_proc(char *page, char **start, off_t off, spin_unlock_irqrestore(&card->lock, flags); - len += sprintf(page+len, "%-16s 0x%lx\n", - "csr (cached)", (unsigned long)card->csr); - len += sprintf(page+len, "%-16s 0x%lx\n", - "csr", (unsigned long)csr); - len += sprintf(page+len, "%-16s %lu\n", - "txoff", (unsigned long)txoff); - len += sprintf(page+len, "%-16s %lu\n", - "txlen", (unsigned long)txlen); - len += sprintf(page+len, "%-16s %lu\n", - "rxoff", (unsigned long)rxoff); - len += sprintf(page+len, "%-16s %lu\n", - "rxlen", (unsigned long)rxlen); - - if (off+count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len-off) ? count : len-off); + seq_printf(m, "%-16s 0x%lx\n", "csr (cached)", (unsigned long)card->csr); + seq_printf(m, "%-16s 0x%lx\n", "csr", (unsigned long)csr); + seq_printf(m, "%-16s %lu\n", "txoff", (unsigned long)txoff); + seq_printf(m, "%-16s %lu\n", "txlen", (unsigned long)txlen); + seq_printf(m, "%-16s %lu\n", "rxoff", (unsigned long)rxoff); + seq_printf(m, "%-16s %lu\n", "rxlen", (unsigned long)rxlen); + + return 0; +} + +static int b1dmactl_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, b1dmactl_proc_show, PDE(inode)->data); } +const struct file_operations b1dmactl_proc_fops = { + .owner = THIS_MODULE, + .open = b1dmactl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL(b1dmactl_proc_fops); + /* ------------------------------------------------------------- */ EXPORT_SYMBOL(b1dma_reset); @@ -963,7 +967,6 @@ EXPORT_SYMBOL(b1dma_reset_ctr); EXPORT_SYMBOL(b1dma_register_appl); EXPORT_SYMBOL(b1dma_release_appl); EXPORT_SYMBOL(b1dma_send_message); -EXPORT_SYMBOL(b1dmactl_read_proc); static int __init b1dma_init(void) { diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c index 6461a32bc83..ff5390546f9 100644 --- a/drivers/isdn/hardware/avm/b1isa.c +++ b/drivers/isdn/hardware/avm/b1isa.c @@ -121,7 +121,7 @@ static int b1isa_probe(struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1isa_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c index 5b314a2c404..c97e4315079 100644 --- a/drivers/isdn/hardware/avm/b1pci.c +++ b/drivers/isdn/hardware/avm/b1pci.c @@ -112,7 +112,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1pci_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); cinfo->capi_ctrl.owner = THIS_MODULE; @@ -251,7 +251,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1dma_load_firmware; cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr; cinfo->capi_ctrl.procinfo = b1pciv4_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c index 7740403b40e..d6391e0afee 100644 --- a/drivers/isdn/hardware/avm/b1pcmcia.c +++ b/drivers/isdn/hardware/avm/b1pcmcia.c @@ -108,7 +108,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq, cinfo->capi_ctrl.load_firmware = b1_load_firmware; cinfo->capi_ctrl.reset_ctr = b1_reset_ctr; cinfo->capi_ctrl.procinfo = b1pcmcia_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index 6833301a45f..de6e6b31181 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include #include @@ -1062,19 +1064,18 @@ static char *c4_procinfo(struct capi_ctr *ctrl) return cinfo->infobuf; } -static int c4_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl) +static int c4_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); avmcard *card = cinfo->card; u8 flag; - int len = 0; char *s; - len += sprintf(page+len, "%-16s %s\n", "name", card->name); - len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port); - len += sprintf(page+len, "%-16s %d\n", "irq", card->irq); - len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase); + seq_printf(m, "%-16s %s\n", "name", card->name); + seq_printf(m, "%-16s 0x%x\n", "io", card->port); + seq_printf(m, "%-16s %d\n", "irq", card->irq); + seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase); switch (card->cardtype) { case avm_b1isa: s = "B1 ISA"; break; case avm_b1pci: s = "B1 PCI"; break; @@ -1087,18 +1088,18 @@ static int c4_read_proc(char *page, char **start, off_t off, case avm_c2: s = "C2"; break; default: s = "???"; break; } - len += sprintf(page+len, "%-16s %s\n", "type", s); + seq_printf(m, "%-16s %s\n", "type", s); if ((s = cinfo->version[VER_DRIVER]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_driver", s); + seq_printf(m, "%-16s %s\n", "ver_driver", s); if ((s = cinfo->version[VER_CARDTYPE]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s); + seq_printf(m, "%-16s %s\n", "ver_cardtype", s); if ((s = cinfo->version[VER_SERIAL]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_serial", s); + seq_printf(m, "%-16s %s\n", "ver_serial", s); if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[3]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s%s%s%s\n", "protocol", (flag & 0x01) ? " DSS1" : "", (flag & 0x02) ? " CT1" : "", @@ -1112,7 +1113,7 @@ static int c4_read_proc(char *page, char **start, off_t off, if (card->cardtype != avm_m1) { flag = ((u8 *)(ctrl->profile.manu))[5]; if (flag) - len += sprintf(page+len, "%-16s%s%s%s%s\n", + seq_printf(m, "%-16s%s%s%s%s\n", "linetype", (flag & 0x01) ? " point to point" : "", (flag & 0x02) ? " point to multipoint" : "", @@ -1120,16 +1121,24 @@ static int c4_read_proc(char *page, char **start, off_t off, (flag & 0x04) ? " leased line with D-channel" : "" ); } - len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname); - - if (off+count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len-off) ? count : len-off); + seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname); + + return 0; } +static int c4_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, c4_proc_show, PDE(inode)->data); +} + +static const struct file_operations c4_proc_fops = { + .owner = THIS_MODULE, + .open = c4_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* ------------------------------------------------------------- */ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev, @@ -1201,7 +1210,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev, cinfo->capi_ctrl.load_firmware = c4_load_firmware; cinfo->capi_ctrl.reset_ctr = c4_reset_ctr; cinfo->capi_ctrl.procinfo = c4_procinfo; - cinfo->capi_ctrl.ctr_read_proc = c4_read_proc; + cinfo->capi_ctrl.proc_fops = &c4_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c index 1c53fd49adb..baeeb3c2a3e 100644 --- a/drivers/isdn/hardware/avm/t1isa.c +++ b/drivers/isdn/hardware/avm/t1isa.c @@ -429,7 +429,7 @@ static int t1isa_probe(struct pci_dev *pdev, int cardnr) cinfo->capi_ctrl.load_firmware = t1isa_load_firmware; cinfo->capi_ctrl.reset_ctr = t1isa_reset_ctr; cinfo->capi_ctrl.procinfo = t1isa_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c index e6d298d7514..5a3f8309801 100644 --- a/drivers/isdn/hardware/avm/t1pci.c +++ b/drivers/isdn/hardware/avm/t1pci.c @@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev) cinfo->capi_ctrl.load_firmware = b1dma_load_firmware; cinfo->capi_ctrl.reset_ctr = b1dma_reset_ctr; cinfo->capi_ctrl.procinfo = t1pci_procinfo; - cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc; + cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops; strcpy(cinfo->capi_ctrl.name, card->name); retval = attach_capi_ctr(&cinfo->capi_ctrl); diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c index 98fcdfc7ca5..0f073cd7376 100644 --- a/drivers/isdn/hardware/eicon/capimain.c +++ b/drivers/isdn/hardware/eicon/capimain.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "os_capi.h" @@ -75,25 +76,32 @@ void diva_os_free_message_buffer(diva_os_message_buffer_s * dmb) /* * proc function for controller info */ -static int diva_ctl_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl) +static int diva_ctl_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; diva_card *card = (diva_card *) ctrl->driverdata; - int len = 0; - - len += sprintf(page + len, "%s\n", ctrl->name); - len += sprintf(page + len, "Serial No. : %s\n", ctrl->serial); - len += sprintf(page + len, "Id : %d\n", card->Id); - len += sprintf(page + len, "Channels : %d\n", card->d.channels); - - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); + + seq_printf(m, "%s\n", ctrl->name); + seq_printf(m, "Serial No. : %s\n", ctrl->serial); + seq_printf(m, "Id : %d\n", card->Id); + seq_printf(m, "Channels : %d\n", card->d.channels); + + return 0; +} + +static int diva_ctl_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, diva_ctl_proc_show, NULL); } +static const struct file_operations diva_ctl_proc_fops = { + .owner = THIS_MODULE, + .open = diva_ctl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * set additional os settings in capi_ctr struct */ @@ -102,7 +110,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl) ctrl->driver_name = DRIVERLNAME; ctrl->load_firmware = NULL; ctrl->reset_ctr = NULL; - ctrl->ctr_read_proc = diva_ctl_read_proc; + ctrl->proc_fops = &diva_ctl_proc_fops; ctrl->owner = THIS_MODULE; } diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c index 993b14cf177..5d06a743782 100644 --- a/drivers/isdn/hardware/eicon/diva_didd.c +++ b/drivers/isdn/hardware/eicon/diva_didd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "platform.h" @@ -62,39 +63,41 @@ static char *getrev(const char *revision) return rev; } -static int -proc_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int divadidd_proc_show(struct seq_file *m, void *v) { - int len = 0; char tmprev[32]; strcpy(tmprev, main_revision); - len += sprintf(page + len, "%s\n", DRIVERNAME); - len += sprintf(page + len, "name : %s\n", DRIVERLNAME); - len += sprintf(page + len, "release : %s\n", DRIVERRELEASE_DIDD); - len += sprintf(page + len, "build : %s(%s)\n", + seq_printf(m, "%s\n", DRIVERNAME); + seq_printf(m, "name : %s\n", DRIVERLNAME); + seq_printf(m, "release : %s\n", DRIVERRELEASE_DIDD); + seq_printf(m, "build : %s(%s)\n", diva_didd_common_code_build, DIVA_BUILD); - len += sprintf(page + len, "revision : %s\n", getrev(tmprev)); - - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); + seq_printf(m, "revision : %s\n", getrev(tmprev)); + + return 0; } +static int divadidd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, divadidd_proc_show, NULL); +} + +static const struct file_operations divadidd_proc_fops = { + .owner = THIS_MODULE, + .open = divadidd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int DIVA_INIT_FUNCTION create_proc(void) { proc_net_eicon = proc_mkdir("eicon", init_net.proc_net); if (proc_net_eicon) { - if ((proc_didd = - create_proc_entry(DRIVERLNAME, S_IFREG | S_IRUGO, - proc_net_eicon))) { - proc_didd->read_proc = proc_read; - } + proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon, + &divadidd_proc_fops); return (1); } return (0); diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c index 69e71ebe784..f577719ab3f 100644 --- a/drivers/isdn/hardware/eicon/divasi.c +++ b/drivers/isdn/hardware/eicon/divasi.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -86,39 +87,40 @@ static void diva_um_timer_function(unsigned long data); extern struct proc_dir_entry *proc_net_eicon; static struct proc_dir_entry *um_idi_proc_entry = NULL; -static int -um_idi_proc_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int um_idi_proc_show(struct seq_file *m, void *v) { - int len = 0; char tmprev[32]; - len += sprintf(page + len, "%s\n", DRIVERNAME); - len += sprintf(page + len, "name : %s\n", DRIVERLNAME); - len += sprintf(page + len, "release : %s\n", DRIVERRELEASE_IDI); + seq_printf(m, "%s\n", DRIVERNAME); + seq_printf(m, "name : %s\n", DRIVERLNAME); + seq_printf(m, "release : %s\n", DRIVERRELEASE_IDI); strcpy(tmprev, main_revision); - len += sprintf(page + len, "revision : %s\n", getrev(tmprev)); - len += sprintf(page + len, "build : %s\n", DIVA_BUILD); - len += sprintf(page + len, "major : %d\n", major); - - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); + seq_printf(m, "revision : %s\n", getrev(tmprev)); + seq_printf(m, "build : %s\n", DIVA_BUILD); + seq_printf(m, "major : %d\n", major); + + return 0; +} + +static int um_idi_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, um_idi_proc_show, NULL); } +static const struct file_operations um_idi_proc_fops = { + .owner = THIS_MODULE, + .open = um_idi_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int DIVA_INIT_FUNCTION create_um_idi_proc(void) { - um_idi_proc_entry = create_proc_entry(DRIVERLNAME, - S_IFREG | S_IRUGO | S_IWUSR, - proc_net_eicon); + um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon, + &um_idi_proc_fops); if (!um_idi_proc_entry) return (0); - - um_idi_proc_entry->read_proc = um_idi_proc_read; - return (1); } diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c index 040827288ec..46d44a94262 100644 --- a/drivers/isdn/hardware/eicon/divasproc.c +++ b/drivers/isdn/hardware/eicon/divasproc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -141,14 +142,10 @@ void remove_divas_proc(void) } } -/* -** write group_optimization -*/ -static int -write_grp_opt(struct file *file, const char __user *buffer, unsigned long count, - void *data) +static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; if ((count == 1) || (count == 2)) { @@ -172,14 +169,10 @@ write_grp_opt(struct file *file, const char __user *buffer, unsigned long count, return (-EINVAL); } -/* -** write dynamic_l1_down -*/ -static int -write_d_l1_down(struct file *file, const char __user *buffer, unsigned long count, - void *data) +static ssize_t d_l1_down_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; if ((count == 1) || (count == 2)) { @@ -203,63 +196,62 @@ write_d_l1_down(struct file *file, const char __user *buffer, unsigned long coun return (-EINVAL); } - -/* -** read dynamic_l1_down -*/ -static int -read_d_l1_down(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int d_l1_down_proc_show(struct seq_file *m, void *v) { - int len = 0; - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = m->private; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; - len += sprintf(page + len, "%s\n", + seq_printf(m, "%s\n", (IoAdapter->capi_cfg. cfg_1 & DIVA_XDI_CAPI_CFG_1_DYNAMIC_L1_ON) ? "1" : "0"); + return 0; +} - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); +static int d_l1_down_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, d_l1_down_proc_show, PDE(inode)->data); } -/* -** read group_optimization -*/ -static int -read_grp_opt(char *page, char **start, off_t off, int count, int *eof, - void *data) +static const struct file_operations d_l1_down_proc_fops = { + .owner = THIS_MODULE, + .open = d_l1_down_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = d_l1_down_proc_write, +}; + +static int grp_opt_proc_show(struct seq_file *m, void *v) { - int len = 0; - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = m->private; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; - len += sprintf(page + len, "%s\n", + seq_printf(m, "%s\n", (IoAdapter->capi_cfg. cfg_1 & DIVA_XDI_CAPI_CFG_1_GROUP_POPTIMIZATION_ON) ? "1" : "0"); + return 0; +} - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); +static int grp_opt_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, grp_opt_proc_show, PDE(inode)->data); } -/* -** info write -*/ -static int -info_write(struct file *file, const char __user *buffer, unsigned long count, - void *data) +static const struct file_operations grp_opt_proc_fops = { + .owner = THIS_MODULE, + .open = grp_opt_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = grp_opt_proc_write, +}; + +static ssize_t info_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; char c[4]; @@ -277,63 +269,46 @@ info_write(struct file *file, const char __user *buffer, unsigned long count, return (-EINVAL); } -/* -** info read -*/ -static int -info_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int info_proc_show(struct seq_file *m, void *v) { int i = 0; - int len = 0; char *p; char tmpser[16]; - diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data; + diva_os_xdi_adapter_t *a = m->private; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; - len += - sprintf(page + len, "Name : %s\n", - IoAdapter->Properties.Name); - len += sprintf(page + len, "DSP state : %08x\n", a->dsp_mask); - len += sprintf(page + len, "Channels : %02d\n", - IoAdapter->Properties.Channels); - len += sprintf(page + len, "E. max/used : %03d/%03d\n", + seq_printf(m, "Name : %s\n", IoAdapter->Properties.Name); + seq_printf(m, "DSP state : %08x\n", a->dsp_mask); + seq_printf(m, "Channels : %02d\n", IoAdapter->Properties.Channels); + seq_printf(m, "E. max/used : %03d/%03d\n", IoAdapter->e_max, IoAdapter->e_count); diva_get_vserial_number(IoAdapter, tmpser); - len += sprintf(page + len, "Serial : %s\n", tmpser); - len += - sprintf(page + len, "IRQ : %d\n", - IoAdapter->irq_info.irq_nr); - len += sprintf(page + len, "CardIndex : %d\n", a->CardIndex); - len += sprintf(page + len, "CardOrdinal : %d\n", a->CardOrdinal); - len += sprintf(page + len, "Controller : %d\n", a->controller); - len += sprintf(page + len, "Bus-Type : %s\n", + seq_printf(m, "Serial : %s\n", tmpser); + seq_printf(m, "IRQ : %d\n", IoAdapter->irq_info.irq_nr); + seq_printf(m, "CardIndex : %d\n", a->CardIndex); + seq_printf(m, "CardOrdinal : %d\n", a->CardOrdinal); + seq_printf(m, "Controller : %d\n", a->controller); + seq_printf(m, "Bus-Type : %s\n", (a->Bus == DIVAS_XDI_ADAPTER_BUS_ISA) ? "ISA" : "PCI"); - len += sprintf(page + len, "Port-Name : %s\n", a->port_name); + seq_printf(m, "Port-Name : %s\n", a->port_name); if (a->Bus == DIVAS_XDI_ADAPTER_BUS_PCI) { - len += - sprintf(page + len, "PCI-bus : %d\n", - a->resources.pci.bus); - len += - sprintf(page + len, "PCI-func : %d\n", - a->resources.pci.func); + seq_printf(m, "PCI-bus : %d\n", a->resources.pci.bus); + seq_printf(m, "PCI-func : %d\n", a->resources.pci.func); for (i = 0; i < 8; i++) { if (a->resources.pci.bar[i]) { - len += - sprintf(page + len, + seq_printf(m, "Mem / I/O %d : 0x%x / mapped : 0x%lx", i, a->resources.pci.bar[i], (unsigned long) a->resources. pci.addr[i]); if (a->resources.pci.length[i]) { - len += - sprintf(page + len, + seq_printf(m, " / length : %d", a->resources.pci. length[i]); } - len += sprintf(page + len, "\n"); + seq_putc(m, '\n'); } } } @@ -353,16 +328,25 @@ info_read(char *page, char **start, off_t off, int count, int *eof, } else { p = "ready"; } - len += sprintf(page + len, "State : %s\n", p); + seq_printf(m, "State : %s\n", p); - if (off + count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len - off) ? count : len - off); + return 0; +} + +static int info_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, info_proc_show, PDE(inode)->data); } +static const struct file_operations info_proc_fops = { + .owner = THIS_MODULE, + .open = info_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = info_proc_write, +}; + /* ** adapter proc init/de-init */ @@ -380,28 +364,20 @@ int create_adapter_proc(diva_os_xdi_adapter_t * a) return (0); a->proc_adapter_dir = (void *) de; - if (!(pe = - create_proc_entry(info_proc_name, S_IFREG | S_IRUGO | S_IWUSR, de))) + pe = proc_create_data(info_proc_name, S_IRUGO | S_IWUSR, de, + &info_proc_fops, a); + if (!pe) return (0); a->proc_info = (void *) pe; - pe->write_proc = info_write; - pe->read_proc = info_read; - pe->data = a; - if ((pe = create_proc_entry(grp_opt_proc_name, - S_IFREG | S_IRUGO | S_IWUSR, de))) { + pe = proc_create_data(grp_opt_proc_name, S_IRUGO | S_IWUSR, de, + &grp_opt_proc_fops, a); + if (pe) a->proc_grp_opt = (void *) pe; - pe->write_proc = write_grp_opt; - pe->read_proc = read_grp_opt; - pe->data = a; - } - if ((pe = create_proc_entry(d_l1_down_proc_name, - S_IFREG | S_IRUGO | S_IWUSR, de))) { + pe = proc_create_data(d_l1_down_proc_name, S_IRUGO | S_IWUSR, de, + &d_l1_down_proc_fops, a); + if (pe) a->proc_d_l1_down = (void *) pe; - pe->write_proc = write_d_l1_down; - pe->read_proc = read_d_l1_down; - pe->data = a; - } DBG_TRC(("proc entry %s created", tmp)); diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index 4ffaa14b9fc..fe874afa4f8 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -11,6 +11,8 @@ */ #include +#include +#include #include #include #include @@ -432,26 +434,16 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb) return retval; } -/********************************************************************* -hycapi_read_proc - -Informations provided in the /proc/capi-entries. - -*********************************************************************/ - -static int hycapi_read_proc(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *ctrl) +static int hycapi_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; hycapictrl_info *cinfo = (hycapictrl_info *)(ctrl->driverdata); hysdn_card *card = cinfo->card; - int len = 0; char *s; -#ifdef HYCAPI_PRINTFNAMES - printk(KERN_NOTICE "hycapi_read_proc\n"); -#endif - len += sprintf(page+len, "%-16s %s\n", "name", cinfo->cardname); - len += sprintf(page+len, "%-16s 0x%x\n", "io", card->iobase); - len += sprintf(page+len, "%-16s %d\n", "irq", card->irq); + + seq_printf(m, "%-16s %s\n", "name", cinfo->cardname); + seq_printf(m, "%-16s 0x%x\n", "io", card->iobase); + seq_printf(m, "%-16s %d\n", "irq", card->irq); switch (card->brdtype) { case BD_PCCARD: s = "HYSDN Hycard"; break; @@ -461,24 +453,32 @@ static int hycapi_read_proc(char *page, char **start, off_t off, case BD_PLEXUS: s = "HYSDN Plexus30"; break; default: s = "???"; break; } - len += sprintf(page+len, "%-16s %s\n", "type", s); + seq_printf(m, "%-16s %s\n", "type", s); if ((s = cinfo->version[VER_DRIVER]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_driver", s); + seq_printf(m, "%-16s %s\n", "ver_driver", s); if ((s = cinfo->version[VER_CARDTYPE]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s); + seq_printf(m, "%-16s %s\n", "ver_cardtype", s); if ((s = cinfo->version[VER_SERIAL]) != NULL) - len += sprintf(page+len, "%-16s %s\n", "ver_serial", s); + seq_printf(m, "%-16s %s\n", "ver_serial", s); - len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname); + seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname); - if (off+count >= len) - *eof = 1; - if (len < off) - return 0; - *start = page + off; - return ((count < len-off) ? count : len-off); + return 0; +} + +static int hycapi_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, hycapi_proc_show, PDE(inode)->data); } +static const struct file_operations hycapi_proc_fops = { + .owner = THIS_MODULE, + .open = hycapi_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /************************************************************** hycapi_load_firmware @@ -774,7 +774,7 @@ hycapi_capi_create(hysdn_card *card) ctrl->load_firmware = hycapi_load_firmware; ctrl->reset_ctr = hycapi_reset_ctr; ctrl->procinfo = hycapi_procinfo; - ctrl->ctr_read_proc = hycapi_read_proc; + ctrl->proc_fops = &hycapi_proc_fops; strcpy(ctrl->name, cinfo->cardname); ctrl->owner = THIS_MODULE; diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h index 7acb87a4487..d3e5e9da0c8 100644 --- a/include/linux/isdn/capilli.h +++ b/include/linux/isdn/capilli.h @@ -50,8 +50,7 @@ struct capi_ctr { u16 (*send_message)(struct capi_ctr *, struct sk_buff *skb); char *(*procinfo)(struct capi_ctr *); - int (*ctr_read_proc)(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *card); + const struct file_operations *proc_fops; /* filled in before calling ready callback */ u8 manu[CAPI_MANUFACTURER_LEN]; /* CAPI_GET_MANUFACTURER */ diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 97f8d68d574..3487cfe74ae 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -21,7 +21,8 @@ */ #include - +#include +#include #include #include #include @@ -516,33 +517,37 @@ static char *cmtp_procinfo(struct capi_ctr *ctrl) return "CAPI Message Transport Protocol"; } -static int cmtp_ctr_read_proc(char *page, char **start, off_t off, int count, int *eof, struct capi_ctr *ctrl) +static int cmtp_proc_show(struct seq_file *m, void *v) { + struct capi_ctr *ctrl = m->private; struct cmtp_session *session = ctrl->driverdata; struct cmtp_application *app; struct list_head *p, *n; - int len = 0; - len += sprintf(page + len, "%s\n\n", cmtp_procinfo(ctrl)); - len += sprintf(page + len, "addr %s\n", session->name); - len += sprintf(page + len, "ctrl %d\n", session->num); + seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl)); + seq_printf(m, "addr %s\n", session->name); + seq_printf(m, "ctrl %d\n", session->num); list_for_each_safe(p, n, &session->applications) { app = list_entry(p, struct cmtp_application, list); - len += sprintf(page + len, "appl %d -> %d\n", app->appl, app->mapping); + seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping); } - if (off + count >= len) - *eof = 1; - - if (len < off) - return 0; - - *start = page + off; + return 0; +} - return ((count < len - off) ? count : len - off); +static int cmtp_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cmtp_proc_show, PDE(inode)->data); } +static const struct file_operations cmtp_proc_fops = { + .owner = THIS_MODULE, + .open = cmtp_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; int cmtp_attach_device(struct cmtp_session *session) { @@ -582,7 +587,7 @@ int cmtp_attach_device(struct cmtp_session *session) session->ctrl.send_message = cmtp_send_message; session->ctrl.procinfo = cmtp_procinfo; - session->ctrl.ctr_read_proc = cmtp_ctr_read_proc; + session->ctrl.proc_fops = &cmtp_proc_fops; if (attach_capi_ctr(&session->ctrl) < 0) { BT_ERR("Can't attach new controller"); -- cgit v1.2.3-70-g09d2 From ad72c347e56bf3a0231b9d686e17764157d2961c Mon Sep 17 00:00:00 2001 From: Christian Pellegrin Date: Thu, 14 Jan 2010 07:08:34 +0000 Subject: can: Proper ctrlmode handling for CAN devices This patch adds error checking of ctrlmode values for CAN devices. As an example all availabe bits are implemented in the mcp251x driver. Signed-off-by: Christian Pellegrin Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/at91_can.c | 1 + drivers/net/can/bfin_can.c | 1 + drivers/net/can/dev.c | 2 ++ drivers/net/can/mcp251x.c | 11 ++++++++++- drivers/net/can/mscan/mscan.c | 1 + drivers/net/can/sja1000/sja1000.c | 1 + drivers/net/can/ti_hecc.c | 1 + drivers/net/can/usb/ems_usb.c | 1 + include/linux/can/dev.h | 1 + 9 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index f7287497ba6..a2f29a38798 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -1073,6 +1073,7 @@ static int __init at91_can_probe(struct platform_device *pdev) priv->can.bittiming_const = &at91_bittiming_const; priv->can.do_set_bittiming = at91_set_bittiming; priv->can.do_set_mode = at91_set_mode; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; priv->reg_base = addr; priv->dev = dev; priv->clk = clk; diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c index 7e1926e79e9..bf7f9ba2d90 100644 --- a/drivers/net/can/bfin_can.c +++ b/drivers/net/can/bfin_can.c @@ -603,6 +603,7 @@ struct net_device *alloc_bfin_candev(void) priv->can.bittiming_const = &bfin_can_bittiming_const; priv->can.do_set_bittiming = bfin_can_set_bittiming; priv->can.do_set_mode = bfin_can_set_mode; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; return dev; } diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index c1bb29f0322..f08f1202ff0 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -592,6 +592,8 @@ static int can_changelink(struct net_device *dev, if (dev->flags & IFF_UP) return -EBUSY; cm = nla_data(data[IFLA_CAN_CTRLMODE]); + if (cm->flags & ~priv->ctrlmode_supported) + return -EOPNOTSUPP; priv->ctrlmode &= ~cm->mask; priv->ctrlmode |= cm->flags; } diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index afa2fa45fed..bbe186b5a0e 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -539,9 +539,14 @@ static void mcp251x_set_normal_mode(struct spi_device *spi) if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) { /* Put device into loopback mode */ mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LOOPBACK); + } else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) { + /* Put device into listen-only mode */ + mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LISTEN_ONLY); } else { /* Put device into normal mode */ - mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL); + mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL | + (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT ? + CANCTRL_OSM : 0)); /* Wait for the device to enter normal mode */ timeout = jiffies + HZ; @@ -948,6 +953,10 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi) priv->can.bittiming_const = &mcp251x_bittiming_const; priv->can.do_set_mode = mcp251x_do_set_mode; priv->can.clock.freq = pdata->oscillator_frequency / 2; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | + CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY; + if (pdata->model == CAN_MCP251X_MCP2515) + priv->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT; priv->net = net; dev_set_drvdata(&spi->dev, priv); diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 40827c128b6..6b7dd578d41 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -686,6 +686,7 @@ struct net_device *alloc_mscandev(void) priv->can.bittiming_const = &mscan_bittiming_const; priv->can.do_set_bittiming = mscan_do_set_bittiming; priv->can.do_set_mode = mscan_do_set_mode; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; for (i = 0; i < TX_QUEUE_SIZE; i++) { priv->tx_queue[i].id = i; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 345304d779b..ace103a4483 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -567,6 +567,7 @@ struct net_device *alloc_sja1000dev(int sizeof_priv) priv->can.bittiming_const = &sja1000_bittiming_const; priv->can.do_set_bittiming = sja1000_set_bittiming; priv->can.do_set_mode = sja1000_set_mode; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; if (sizeof_priv) priv->priv = (void *)priv + sizeof(struct sja1000_priv); diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 7d370e32a7a..8332e242b0b 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -909,6 +909,7 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->can.bittiming_const = &ti_hecc_bittiming_const; priv->can.do_set_mode = ti_hecc_do_set_mode; priv->can.do_get_state = ti_hecc_get_state; + priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; ndev->irq = irq->start; ndev->flags |= IFF_ECHO; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index ddb17e25665..bfab283ba9b 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -1022,6 +1022,7 @@ static int ems_usb_probe(struct usb_interface *intf, dev->can.bittiming_const = &ems_usb_bittiming_const; dev->can.do_set_bittiming = ems_usb_set_bittiming; dev->can.do_set_mode = ems_usb_set_mode; + dev->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES; netdev->flags |= IFF_ECHO; /* we support local echo */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7e7c98a3e90..c8c660a79f9 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,6 +38,7 @@ struct can_priv { enum can_state state; u32 ctrlmode; + u32 ctrlmode_supported; int restart_ms; struct timer_list restart_timer; -- cgit v1.2.3-70-g09d2 From 05c2828c72c4eabf62376adfe27bd24797621f62 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2010 06:17:09 +0000 Subject: tun: export underlying socket Tun device looks similar to a packet socket in that both pass complete frames from/to userspace. This patch fills in enough fields in the socket underlying tun driver to support sendmsg/recvmsg operations, and message flags MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket to modules. Regular read/write behaviour is unchanged. This way, code using raw sockets to inject packets into a physical device, can support injecting packets into host network stack almost without modification. First user of this interface will be vhost virtualization accelerator. Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu Acked-by: David S. Miller Signed-off-by: David S. Miller --- drivers/net/tun.c | 101 +++++++++++++++++++++++++++++++++++++++---------- include/linux/if_tun.h | 14 +++++++ 2 files changed, 96 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2834a01bae2..5adb3d15055 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) err = 0; tfile->tun = tun; tun->tfile = tfile; + tun->socket.file = file; dev_hold(tun->dev); sock_hold(tun->socket.sk); atomic_inc(&tfile->count); @@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun) /* Detach from net device */ netif_tx_lock_bh(tun->dev); tun->tfile = NULL; + tun->socket.file = NULL; netif_tx_unlock_bh(tun->dev); /* Drop read queue */ @@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&tun->socket.wait); + wake_up_interruptible_poll(&tun->socket.wait, POLLIN | + POLLRDNORM | POLLRDBAND); return NETDEV_TX_OK; drop: @@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, len = min_t(int, skb->len, len); skb_copy_datagram_const_iovec(skb, 0, iv, total, len); - total += len; + total += skb->len; tun->dev->stats.tx_packets++; tun->dev->stats.tx_bytes += len; @@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, return total; } -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t tun_do_read(struct tun_struct *tun, + struct kiocb *iocb, const struct iovec *iv, + ssize_t len, int noblock) { - struct file *file = iocb->ki_filp; - struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; - ssize_t len, ret = 0; - - if (!tun) - return -EBADFD; + ssize_t ret = 0; DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); - len = iov_length(iv, count); - if (len < 0) { - ret = -EINVAL; - goto out; - } - add_wait_queue(&tun->socket.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { - if (file->f_flags & O_NONBLOCK) { + if (noblock) { ret = -EAGAIN; break; } @@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, current->state = TASK_RUNNING; remove_wait_queue(&tun->socket.wait, &wait); + return ret; +} + +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); + ssize_t len, ret; + + if (!tun) + return -EBADFD; + len = iov_length(iv, count); + if (len < 0) { + ret = -EINVAL; + goto out; + } + + ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); out: tun_put(tun); return ret; @@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk) return; if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; kill_fasync(&tun->fasync, SIGIO, POLL_OUT); @@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk) free_netdev(tun_sk(sk)->tun->dev); } +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + return tun_get_user(tun, m->msg_iov, total_len, + m->msg_flags & MSG_DONTWAIT); +} + +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len, + int flags) +{ + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + int ret; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + return -EINVAL; + ret = tun_do_read(tun, iocb, m->msg_iov, total_len, + flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } + return ret; +} + +/* Ops structure to mimic raw sockets with tun */ +static const struct proto_ops tun_socket_ops = { + .sendmsg = tun_sendmsg, + .recvmsg = tun_recvmsg, +}; + static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, @@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) goto err_free_dev; init_waitqueue_head(&tun->socket.wait); + tun->socket.ops = &tun_socket_ops; sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; @@ -1525,6 +1571,23 @@ static void tun_cleanup(void) rtnl_link_unregister(&tun_link_ops); } +/* Get an underlying socket object from tun file. Returns error unless file is + * attached to a device. The returned object works like a packet socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for + * holding a reference to the file for as long as the socket is in use. */ +struct socket *tun_get_socket(struct file *file) +{ + struct tun_struct *tun; + if (file->f_op != &tun_fops) + return ERR_PTR(-EINVAL); + tun = tun_get(file); + if (!tun) + return ERR_PTR(-EBADFD); + tun_put(tun); + return &tun->socket; +} +EXPORT_SYMBOL_GPL(tun_get_socket); + module_init(tun_init); module_exit(tun_cleanup); MODULE_DESCRIPTION(DRV_DESCRIPTION); diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3f5fd523b49..404abe00162 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -86,4 +86,18 @@ struct tun_filter { __u8 addr[0][ETH_ALEN]; }; +#ifdef __KERNEL__ +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) +struct socket *tun_get_socket(struct file *); +#else +#include +#include +struct file; +struct socket; +static inline struct socket *tun_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_TUN */ +#endif /* __KERNEL__ */ #endif /* __IF_TUN_H */ -- cgit v1.2.3-70-g09d2 From 3a4d5c94e959359ece6d6b55045c3f046677f55c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2010 06:17:27 +0000 Subject: vhost_net: a kernel-level virtio server What it is: vhost net is a character device that can be used to reduce the number of system calls involved in virtio networking. Existing virtio net code is used in the guest without modification. There's similarity with vringfd, with some differences and reduced scope - uses eventfd for signalling - structures can be moved around in memory at any time (good for migration, bug work-arounds in userspace) - write logging is supported (good for migration) - support memory table and not just an offset (needed for kvm) common virtio related code has been put in a separate file vhost.c and can be made into a separate module if/when more backends appear. I used Rusty's lguest.c as the source for developing this part : this supplied me with witty comments I wouldn't be able to write myself. What it is not: vhost net is not a bus, and not a generic new system call. No assumptions are made on how guest performs hypercalls. Userspace hypervisors are supported as well as kvm. How it works: Basically, we connect virtio frontend (configured by userspace) to a backend. The backend could be a network device, or a tap device. Backend is also configured by userspace, including vlan/mac etc. Status: This works for me, and I haven't see any crashes. Compared to userspace, people reported improved latency (as I save up to 4 system calls per packet), as well as better bandwidth and CPU utilization. Features that I plan to look at in the future: - mergeable buffers - zero copy - scalability tuning: figure out the best threading model to use Note on RCU usage (this is also documented in vhost.h, near private_pointer which is the value protected by this variant of RCU): what is happening is that the rcu_dereference() is being used in a workqueue item. The role of rcu_read_lock() is taken on by the start of execution of the workqueue item, of rcu_read_unlock() by the end of execution of the workqueue item, and of synchronize_rcu() by flush_workqueue()/flush_work(). In the future we might need to apply some gcc attribute or sparse annotation to the function passed to INIT_WORK(). Paul's ack below is for this RCU usage. (Includes fixes by Alan Cox , David L Stevens , Chris Wright ) Acked-by: Rusty Russell Acked-by: Arnd Bergmann Acked-by: "Paul E. McKenney" Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- MAINTAINERS | 9 + arch/ia64/kvm/Kconfig | 1 + arch/powerpc/kvm/Kconfig | 1 + arch/s390/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + drivers/Makefile | 1 + drivers/vhost/Kconfig | 11 + drivers/vhost/Makefile | 2 + drivers/vhost/net.c | 661 ++++++++++++++++++++++++++ drivers/vhost/vhost.c | 1098 ++++++++++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost.h | 161 +++++++ include/linux/Kbuild | 1 + include/linux/miscdevice.h | 1 + include/linux/vhost.h | 130 ++++++ 14 files changed, 2079 insertions(+) create mode 100644 drivers/vhost/Kconfig create mode 100644 drivers/vhost/Makefile create mode 100644 drivers/vhost/net.c create mode 100644 drivers/vhost/vhost.c create mode 100644 drivers/vhost/vhost.h create mode 100644 include/linux/vhost.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 745643b8c34..337dffbe9a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5803,6 +5803,15 @@ S: Maintained F: Documentation/filesystems/vfat.txt F: fs/fat/ +VIRTIO HOST (VHOST) +M: "Michael S. Tsirkin" +L: kvm@vger.kernel.org +L: virtualization@lists.osdl.org +L: netdev@vger.kernel.org +S: Maintained +F: drivers/vhost/ +F: include/linux/vhost.h + VIA RHINE NETWORK DRIVER M: Roger Luethi S: Maintained diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index ef3e7be29ca..01c75797119 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -47,6 +47,7 @@ config KVM_INTEL Provides support for KVM on Itanium 2 processors equipped with the VT extensions. +source drivers/vhost/Kconfig source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 07703f72330..e28841fbfb8 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -75,6 +75,7 @@ config KVM_E500 If unsure, say N. +source drivers/vhost/Kconfig source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 6ee55ae84ce..a7251580891 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -35,6 +35,7 @@ config KVM # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. +source drivers/vhost/Kconfig source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4cd49833246..3c4d0109ad2 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -65,6 +65,7 @@ config KVM_AMD # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. +source drivers/vhost/Kconfig source drivers/lguest/Kconfig source drivers/virtio/Kconfig diff --git a/drivers/Makefile b/drivers/Makefile index 6ee53c7a57a..81e36596b1e 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ +obj-$(CONFIG_VHOST_NET) += vhost/ obj-$(CONFIG_VIRTIO) += virtio/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig new file mode 100644 index 00000000000..9f409f447ae --- /dev/null +++ b/drivers/vhost/Kconfig @@ -0,0 +1,11 @@ +config VHOST_NET + tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" + depends on NET && EVENTFD && EXPERIMENTAL + ---help--- + This kernel module can be loaded in host kernel to accelerate + guest networking with virtio_net. Not to be confused with virtio_net + module itself which needs to be loaded in guest kernel. + + To compile this driver as a module, choose M here: the module will + be called vhost_net. + diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile new file mode 100644 index 00000000000..72dd02050bb --- /dev/null +++ b/drivers/vhost/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_VHOST_NET) += vhost_net.o +vhost_net-y := vhost.o net.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c new file mode 100644 index 00000000000..4c8928319e1 --- /dev/null +++ b/drivers/vhost/net.c @@ -0,0 +1,661 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Author: Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * virtio-net server in host kernel. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "vhost.h" + +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_NET_WEIGHT 0x80000 + +enum { + VHOST_NET_VQ_RX = 0, + VHOST_NET_VQ_TX = 1, + VHOST_NET_VQ_MAX = 2, +}; + +enum vhost_net_poll_state { + VHOST_NET_POLL_DISABLED = 0, + VHOST_NET_POLL_STARTED = 1, + VHOST_NET_POLL_STOPPED = 2, +}; + +struct vhost_net { + struct vhost_dev dev; + struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; + struct vhost_poll poll[VHOST_NET_VQ_MAX]; + /* Tells us whether we are polling a socket for TX. + * We only do this when socket buffer fills up. + * Protected by tx vq lock. */ + enum vhost_net_poll_state tx_poll_state; +}; + +/* Pop first len bytes from iovec. Return number of segments used. */ +static int move_iovec_hdr(struct iovec *from, struct iovec *to, + size_t len, int iov_count) +{ + int seg = 0; + size_t size; + while (len && seg < iov_count) { + size = min(from->iov_len, len); + to->iov_base = from->iov_base; + to->iov_len = size; + from->iov_len -= size; + from->iov_base += size; + len -= size; + ++from; + ++to; + ++seg; + } + return seg; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_stop(struct vhost_net *net) +{ + if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) + return; + vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); + net->tx_poll_state = VHOST_NET_POLL_STOPPED; +} + +/* Caller must have TX VQ lock */ +static void tx_poll_start(struct vhost_net *net, struct socket *sock) +{ + if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) + return; + vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); + net->tx_poll_state = VHOST_NET_POLL_STARTED; +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_tx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; + unsigned head, out, in, s; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + size_t len, total_len = 0; + int err, wmem; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock) + return; + + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + + if (wmem < sock->sk->sk_sndbuf * 2) + tx_poll_stop(net); + hdr_size = vq->hdr_size; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + NULL, NULL); + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + wmem = atomic_read(&sock->sk->sk_wmem_alloc); + if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { + tx_poll_start(net, sock); + set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + break; + } + if (unlikely(vhost_enable_notify(vq))) { + vhost_disable_notify(vq); + continue; + } + break; + } + if (in) { + vq_err(vq, "Unexpected descriptor format for TX: " + "out %d, int %d\n", out, in); + break; + } + /* Skip header. TODO: support TSO. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); + msg.msg_iovlen = out; + len = iov_length(vq->iov, out); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for TX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + /* TODO: Check specific error and bomb out unless ENOBUFS? */ + err = sock->ops->sendmsg(NULL, sock, &msg, len); + if (unlikely(err < 0)) { + vhost_discard_vq_desc(vq); + tx_poll_start(net, sock); + break; + } + if (err != len) + pr_err("Truncated TX packet: " + " len %d != %zd\n", err, len); + vhost_add_used_and_signal(&net->dev, vq, head, 0); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_rx(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + unsigned head, out, in, log, s; + struct vhost_log *vq_log; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, /* FIXME: get and handle RX aux data. */ + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + + struct virtio_net_hdr hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + size_t len, total_len = 0; + int err; + size_t hdr_size; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + hdr_size = vq->hdr_size; + + vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? + vq->log : NULL; + + for (;;) { + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + vq_log, &log); + /* OK, now we need to know about added descriptors. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(vq))) { + /* They have slipped one in as we were + * doing that: check again. */ + vhost_disable_notify(vq); + continue; + } + /* Nothing new? Wait for eventfd to tell us + * they refilled. */ + break; + } + /* We don't need to be notified again. */ + if (out) { + vq_err(vq, "Unexpected descriptor format for RX: " + "out %d, int %d\n", + out, in); + break; + } + /* Skip header. TODO: support TSO/mergeable rx buffers. */ + s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); + msg.msg_iovlen = in; + len = iov_length(vq->iov, in); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected header len for RX: " + "%zd expected %zd\n", + iov_length(vq->hdr, s), hdr_size); + break; + } + err = sock->ops->recvmsg(NULL, sock, &msg, + len, MSG_DONTWAIT | MSG_TRUNC); + /* TODO: Check specific error and bomb out unless EAGAIN? */ + if (err < 0) { + vhost_discard_vq_desc(vq); + break; + } + /* TODO: Should check and handle checksum. */ + if (err > len) { + pr_err("Discarded truncated rx packet: " + " len %d > %zd\n", err, len); + vhost_discard_vq_desc(vq); + continue; + } + len = err; + err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); + if (err) { + vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", + vq->iov->iov_base, err); + break; + } + len += hdr_size; + vhost_add_used_and_signal(&net->dev, vq, head, len); + if (unlikely(vq_log)) + vhost_log_write(vq, vq_log, log, len); + total_len += len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +static void handle_tx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_tx(net); +} + +static void handle_rx_kick(struct work_struct *work) +{ + struct vhost_virtqueue *vq; + struct vhost_net *net; + vq = container_of(work, struct vhost_virtqueue, poll.work); + net = container_of(vq->dev, struct vhost_net, dev); + handle_rx(net); +} + +static void handle_tx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); + handle_tx(net); +} + +static void handle_rx_net(struct work_struct *work) +{ + struct vhost_net *net; + net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); + handle_rx(net); +} + +static int vhost_net_open(struct inode *inode, struct file *f) +{ + struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + int r; + if (!n) + return -ENOMEM; + n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; + n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; + r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX); + if (r < 0) { + kfree(n); + return r; + } + + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + + f->private_data = n; + + return 0; +} + +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + if (!vq->private_data) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + tx_poll_stop(n); + n->tx_poll_state = VHOST_NET_POLL_DISABLED; + } else + vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); +} + +static void vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock = vq->private_data; + if (!sock) + return; + if (vq == n->vqs + VHOST_NET_VQ_TX) { + n->tx_poll_state = VHOST_NET_POLL_STOPPED; + tx_poll_start(n, sock); + } else + vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); +} + +static struct socket *vhost_net_stop_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct socket *sock; + + mutex_lock(&vq->mutex); + sock = vq->private_data; + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + return sock; +} + +static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, + struct socket **rx_sock) +{ + *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); + *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); +} + +static void vhost_net_flush_vq(struct vhost_net *n, int index) +{ + vhost_poll_flush(n->poll + index); + vhost_poll_flush(&n->dev.vqs[index].poll); +} + +static void vhost_net_flush(struct vhost_net *n) +{ + vhost_net_flush_vq(n, VHOST_NET_VQ_TX); + vhost_net_flush_vq(n, VHOST_NET_VQ_RX); +} + +static int vhost_net_release(struct inode *inode, struct file *f) +{ + struct vhost_net *n = f->private_data; + struct socket *tx_sock; + struct socket *rx_sock; + + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + vhost_dev_cleanup(&n->dev); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + /* We do an extra flush before freeing memory, + * since jobs can re-queue themselves. */ + vhost_net_flush(n); + kfree(n); + return 0; +} + +static struct socket *get_raw_socket(int fd) +{ + struct { + struct sockaddr_ll sa; + char buf[MAX_ADDR_LEN]; + } uaddr; + int uaddr_len = sizeof uaddr, r; + struct socket *sock = sockfd_lookup(fd, &r); + if (!sock) + return ERR_PTR(-ENOTSOCK); + + /* Parameter checking */ + if (sock->sk->sk_type != SOCK_RAW) { + r = -ESOCKTNOSUPPORT; + goto err; + } + + r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, + &uaddr_len, 0); + if (r) + goto err; + + if (uaddr.sa.sll_family != AF_PACKET) { + r = -EPFNOSUPPORT; + goto err; + } + return sock; +err: + fput(sock->file); + return ERR_PTR(r); +} + +static struct socket *get_tun_socket(int fd) +{ + struct file *file = fget(fd); + struct socket *sock; + if (!file) + return ERR_PTR(-EBADF); + sock = tun_get_socket(file); + if (IS_ERR(sock)) + fput(file); + return sock; +} + +static struct socket *get_socket(int fd) +{ + struct socket *sock; + /* special case to disable backend */ + if (fd == -1) + return NULL; + sock = get_raw_socket(fd); + if (!IS_ERR(sock)) + return sock; + sock = get_tun_socket(fd); + if (!IS_ERR(sock)) + return sock; + return ERR_PTR(-ENOTSOCK); +} + +static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) +{ + struct socket *sock, *oldsock; + struct vhost_virtqueue *vq; + int r; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + if (index >= VHOST_NET_VQ_MAX) { + r = -ENOBUFS; + goto err; + } + vq = n->vqs + index; + mutex_lock(&vq->mutex); + + /* Verify that ring has been setup correctly. */ + if (!vhost_vq_access_ok(vq)) { + r = -EFAULT; + goto err; + } + sock = get_socket(fd); + if (IS_ERR(sock)) { + r = PTR_ERR(sock); + goto err; + } + + /* start polling new socket */ + oldsock = vq->private_data; + if (sock == oldsock) + goto done; + + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, sock); + vhost_net_enable_vq(n, vq); + mutex_unlock(&vq->mutex); +done: + if (oldsock) { + vhost_net_flush_vq(n, index); + fput(oldsock->file); + } +err: + mutex_unlock(&n->dev.mutex); + return r; +} + +static long vhost_net_reset_owner(struct vhost_net *n) +{ + struct socket *tx_sock = NULL; + struct socket *rx_sock = NULL; + long err; + mutex_lock(&n->dev.mutex); + err = vhost_dev_check_owner(&n->dev); + if (err) + goto done; + vhost_net_stop(n, &tx_sock, &rx_sock); + vhost_net_flush(n); + err = vhost_dev_reset_owner(&n->dev); +done: + mutex_unlock(&n->dev.mutex); + if (tx_sock) + fput(tx_sock->file); + if (rx_sock) + fput(rx_sock->file); + return err; +} + +static int vhost_net_set_features(struct vhost_net *n, u64 features) +{ + size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ? + sizeof(struct virtio_net_hdr) : 0; + int i; + mutex_lock(&n->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&n->dev)) { + mutex_unlock(&n->dev.mutex); + return -EFAULT; + } + n->dev.acked_features = features; + smp_wmb(); + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + mutex_lock(&n->vqs[i].mutex); + n->vqs[i].hdr_size = hdr_size; + mutex_unlock(&n->vqs[i].mutex); + } + vhost_net_flush(n); + mutex_unlock(&n->dev.mutex); + return 0; +} + +static long vhost_net_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_net *n = f->private_data; + void __user *argp = (void __user *)arg; + u64 __user *featurep = argp; + struct vhost_vring_file backend; + u64 features; + int r; + switch (ioctl) { + case VHOST_NET_SET_BACKEND: + r = copy_from_user(&backend, argp, sizeof backend); + if (r < 0) + return r; + return vhost_net_set_backend(n, backend.index, backend.fd); + case VHOST_GET_FEATURES: + features = VHOST_FEATURES; + return copy_to_user(featurep, &features, sizeof features); + case VHOST_SET_FEATURES: + r = copy_from_user(&features, featurep, sizeof features); + if (r < 0) + return r; + if (features & ~VHOST_FEATURES) + return -EOPNOTSUPP; + return vhost_net_set_features(n, features); + case VHOST_RESET_OWNER: + return vhost_net_reset_owner(n); + default: + mutex_lock(&n->dev.mutex); + r = vhost_dev_ioctl(&n->dev, ioctl, arg); + vhost_net_flush(n); + mutex_unlock(&n->dev.mutex); + return r; + } +} + +#ifdef CONFIG_COMPAT +static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + +const static struct file_operations vhost_net_fops = { + .owner = THIS_MODULE, + .release = vhost_net_release, + .unlocked_ioctl = vhost_net_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_net_compat_ioctl, +#endif + .open = vhost_net_open, +}; + +static struct miscdevice vhost_net_misc = { + VHOST_NET_MINOR, + "vhost-net", + &vhost_net_fops, +}; + +int vhost_net_init(void) +{ + int r = vhost_init(); + if (r) + goto err_init; + r = misc_register(&vhost_net_misc); + if (r) + goto err_reg; + return 0; +err_reg: + vhost_cleanup(); +err_init: + return r; + +} +module_init(vhost_net_init); + +void vhost_net_exit(void) +{ + misc_deregister(&vhost_net_misc); + vhost_cleanup(); +} +module_exit(vhost_net_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c new file mode 100644 index 00000000000..c8c25dbc585 --- /dev/null +++ b/drivers/vhost/vhost.c @@ -0,0 +1,1098 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * Author: Michael S. Tsirkin + * + * Inspiration, some code, and most witty comments come from + * Documentation/lguest/lguest.c, by Rusty Russell + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Generic code for virtio server in host kernel. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "vhost.h" + +enum { + VHOST_MEMORY_MAX_NREGIONS = 64, + VHOST_MEMORY_F_LOG = 0x1, +}; + +static struct workqueue_struct *vhost_workqueue; + +static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct vhost_poll *poll; + poll = container_of(pt, struct vhost_poll, table); + + poll->wqh = wqh; + add_wait_queue(wqh, &poll->wait); +} + +static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + struct vhost_poll *poll; + poll = container_of(wait, struct vhost_poll, wait); + if (!((unsigned long)key & poll->mask)) + return 0; + + queue_work(vhost_workqueue, &poll->work); + return 0; +} + +/* Init poll structure */ +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask) +{ + INIT_WORK(&poll->work, func); + init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); + init_poll_funcptr(&poll->table, vhost_poll_func); + poll->mask = mask; +} + +/* Start polling a file. We add ourselves to file's wait queue. The caller must + * keep a reference to a file until after vhost_poll_stop is called. */ +void vhost_poll_start(struct vhost_poll *poll, struct file *file) +{ + unsigned long mask; + mask = file->f_op->poll(file, &poll->table); + if (mask) + vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); +} + +/* Stop polling a file. After this function returns, it becomes safe to drop the + * file reference. You must also flush afterwards. */ +void vhost_poll_stop(struct vhost_poll *poll) +{ + remove_wait_queue(poll->wqh, &poll->wait); +} + +/* Flush any work that has been scheduled. When calling this, don't hold any + * locks that are also used by the callback. */ +void vhost_poll_flush(struct vhost_poll *poll) +{ + flush_work(&poll->work); +} + +void vhost_poll_queue(struct vhost_poll *poll) +{ + queue_work(vhost_workqueue, &poll->work); +} + +static void vhost_vq_reset(struct vhost_dev *dev, + struct vhost_virtqueue *vq) +{ + vq->num = 1; + vq->desc = NULL; + vq->avail = NULL; + vq->used = NULL; + vq->last_avail_idx = 0; + vq->avail_idx = 0; + vq->last_used_idx = 0; + vq->used_flags = 0; + vq->used_flags = 0; + vq->log_used = false; + vq->log_addr = -1ull; + vq->hdr_size = 0; + vq->private_data = NULL; + vq->log_base = NULL; + vq->error_ctx = NULL; + vq->error = NULL; + vq->kick = NULL; + vq->call_ctx = NULL; + vq->call = NULL; +} + +long vhost_dev_init(struct vhost_dev *dev, + struct vhost_virtqueue *vqs, int nvqs) +{ + int i; + dev->vqs = vqs; + dev->nvqs = nvqs; + mutex_init(&dev->mutex); + dev->log_ctx = NULL; + dev->log_file = NULL; + dev->memory = NULL; + dev->mm = NULL; + + for (i = 0; i < dev->nvqs; ++i) { + dev->vqs[i].dev = dev; + mutex_init(&dev->vqs[i].mutex); + vhost_vq_reset(dev, dev->vqs + i); + if (dev->vqs[i].handle_kick) + vhost_poll_init(&dev->vqs[i].poll, + dev->vqs[i].handle_kick, + POLLIN); + } + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_check_owner(struct vhost_dev *dev) +{ + /* Are you the owner? If not, I don't think you mean to do that */ + return dev->mm == current->mm ? 0 : -EPERM; +} + +/* Caller should have device mutex */ +static long vhost_dev_set_owner(struct vhost_dev *dev) +{ + /* Is there an owner already? */ + if (dev->mm) + return -EBUSY; + /* No owner, become one */ + dev->mm = get_task_mm(current); + return 0; +} + +/* Caller should have device mutex */ +long vhost_dev_reset_owner(struct vhost_dev *dev) +{ + struct vhost_memory *memory; + + /* Restore memory to default empty mapping. */ + memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); + if (!memory) + return -ENOMEM; + + vhost_dev_cleanup(dev); + + memory->nregions = 0; + dev->memory = memory; + return 0; +} + +/* Caller should have device mutex */ +void vhost_dev_cleanup(struct vhost_dev *dev) +{ + int i; + for (i = 0; i < dev->nvqs; ++i) { + if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { + vhost_poll_stop(&dev->vqs[i].poll); + vhost_poll_flush(&dev->vqs[i].poll); + } + if (dev->vqs[i].error_ctx) + eventfd_ctx_put(dev->vqs[i].error_ctx); + if (dev->vqs[i].error) + fput(dev->vqs[i].error); + if (dev->vqs[i].kick) + fput(dev->vqs[i].kick); + if (dev->vqs[i].call_ctx) + eventfd_ctx_put(dev->vqs[i].call_ctx); + if (dev->vqs[i].call) + fput(dev->vqs[i].call); + vhost_vq_reset(dev, dev->vqs + i); + } + if (dev->log_ctx) + eventfd_ctx_put(dev->log_ctx); + dev->log_ctx = NULL; + if (dev->log_file) + fput(dev->log_file); + dev->log_file = NULL; + /* No one will access memory at this point */ + kfree(dev->memory); + dev->memory = NULL; + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; +} + +static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) +{ + u64 a = addr / VHOST_PAGE_SIZE / 8; + /* Make sure 64 bit math will not overflow. */ + if (a > ULONG_MAX - (unsigned long)log_base || + a + (unsigned long)log_base > ULONG_MAX) + return -EFAULT; + + return access_ok(VERIFY_WRITE, log_base + a, + (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); +} + +/* Caller should have vq mutex and device mutex. */ +static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, + int log_all) +{ + int i; + for (i = 0; i < mem->nregions; ++i) { + struct vhost_memory_region *m = mem->regions + i; + unsigned long a = m->userspace_addr; + if (m->memory_size > ULONG_MAX) + return 0; + else if (!access_ok(VERIFY_WRITE, (void __user *)a, + m->memory_size)) + return 0; + else if (log_all && !log_access_ok(log_base, + m->guest_phys_addr, + m->memory_size)) + return 0; + } + return 1; +} + +/* Can we switch to this memory table? */ +/* Caller should have device mutex but not vq mutex */ +static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, + int log_all) +{ + int i; + for (i = 0; i < d->nvqs; ++i) { + int ok; + mutex_lock(&d->vqs[i].mutex); + /* If ring is inactive, will check when it's enabled. */ + if (d->vqs[i].private_data) + ok = vq_memory_access_ok(d->vqs[i].log_base, mem, + log_all); + else + ok = 1; + mutex_unlock(&d->vqs[i].mutex); + if (!ok) + return 0; + } + return 1; +} + +static int vq_access_ok(unsigned int num, + struct vring_desc __user *desc, + struct vring_avail __user *avail, + struct vring_used __user *used) +{ + return access_ok(VERIFY_READ, desc, num * sizeof *desc) && + access_ok(VERIFY_READ, avail, + sizeof *avail + num * sizeof *avail->ring) && + access_ok(VERIFY_WRITE, used, + sizeof *used + num * sizeof *used->ring); +} + +/* Can we log writes? */ +/* Caller should have device mutex but not vq mutex */ +int vhost_log_access_ok(struct vhost_dev *dev) +{ + return memory_access_ok(dev, dev->memory, 1); +} + +/* Verify access for write logging. */ +/* Caller should have vq mutex and device mutex */ +static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) +{ + return vq_memory_access_ok(log_base, vq->dev->memory, + vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && + (!vq->log_used || log_access_ok(log_base, vq->log_addr, + sizeof *vq->used + + vq->num * sizeof *vq->used->ring)); +} + +/* Can we start vq? */ +/* Caller should have vq mutex and device mutex */ +int vhost_vq_access_ok(struct vhost_virtqueue *vq) +{ + return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq, vq->log_base); +} + +static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) +{ + struct vhost_memory mem, *newmem, *oldmem; + unsigned long size = offsetof(struct vhost_memory, regions); + long r; + r = copy_from_user(&mem, m, size); + if (r) + return r; + if (mem.padding) + return -EOPNOTSUPP; + if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) + return -E2BIG; + newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); + if (!newmem) + return -ENOMEM; + + memcpy(newmem, &mem, size); + r = copy_from_user(newmem->regions, m->regions, + mem.nregions * sizeof *m->regions); + if (r) { + kfree(newmem); + return r; + } + + if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) + return -EFAULT; + oldmem = d->memory; + rcu_assign_pointer(d->memory, newmem); + synchronize_rcu(); + kfree(oldmem); + return 0; +} + +static int init_used(struct vhost_virtqueue *vq, + struct vring_used __user *used) +{ + int r = put_user(vq->used_flags, &used->flags); + if (r) + return r; + return get_user(vq->last_used_idx, &used->idx); +} + +static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) +{ + struct file *eventfp, *filep = NULL, + *pollstart = NULL, *pollstop = NULL; + struct eventfd_ctx *ctx = NULL; + u32 __user *idxp = argp; + struct vhost_virtqueue *vq; + struct vhost_vring_state s; + struct vhost_vring_file f; + struct vhost_vring_addr a; + u32 idx; + long r; + + r = get_user(idx, idxp); + if (r < 0) + return r; + if (idx > d->nvqs) + return -ENOBUFS; + + vq = d->vqs + idx; + + mutex_lock(&vq->mutex); + + switch (ioctl) { + case VHOST_SET_VRING_NUM: + /* Resizing ring with an active backend? + * You don't want to do that. */ + if (vq->private_data) { + r = -EBUSY; + break; + } + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) { + r = -EINVAL; + break; + } + vq->num = s.num; + break; + case VHOST_SET_VRING_BASE: + /* Moving base with an active backend? + * You don't want to do that. */ + if (vq->private_data) { + r = -EBUSY; + break; + } + r = copy_from_user(&s, argp, sizeof s); + if (r < 0) + break; + if (s.num > 0xffff) { + r = -EINVAL; + break; + } + vq->last_avail_idx = s.num; + /* Forget the cached index value. */ + vq->avail_idx = vq->last_avail_idx; + break; + case VHOST_GET_VRING_BASE: + s.index = idx; + s.num = vq->last_avail_idx; + r = copy_to_user(argp, &s, sizeof s); + break; + case VHOST_SET_VRING_ADDR: + r = copy_from_user(&a, argp, sizeof a); + if (r < 0) + break; + if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) { + r = -EOPNOTSUPP; + break; + } + /* For 32bit, verify that the top 32bits of the user + data are set to zero. */ + if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || + (u64)(unsigned long)a.used_user_addr != a.used_user_addr || + (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) { + r = -EFAULT; + break; + } + if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) || + (a.used_user_addr & (sizeof *vq->used->ring - 1)) || + (a.log_guest_addr & (sizeof *vq->used->ring - 1))) { + r = -EINVAL; + break; + } + + /* We only verify access here if backend is configured. + * If it is not, we don't as size might not have been setup. + * We will verify when backend is configured. */ + if (vq->private_data) { + if (!vq_access_ok(vq->num, + (void __user *)(unsigned long)a.desc_user_addr, + (void __user *)(unsigned long)a.avail_user_addr, + (void __user *)(unsigned long)a.used_user_addr)) { + r = -EINVAL; + break; + } + + /* Also validate log access for used ring if enabled. */ + if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && + !log_access_ok(vq->log_base, a.log_guest_addr, + sizeof *vq->used + + vq->num * sizeof *vq->used->ring)) { + r = -EINVAL; + break; + } + } + + r = init_used(vq, (struct vring_used __user *)(unsigned long) + a.used_user_addr); + if (r) + break; + vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); + vq->desc = (void __user *)(unsigned long)a.desc_user_addr; + vq->avail = (void __user *)(unsigned long)a.avail_user_addr; + vq->log_addr = a.log_guest_addr; + vq->used = (void __user *)(unsigned long)a.used_user_addr; + break; + case VHOST_SET_VRING_KICK: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->kick) { + pollstop = filep = vq->kick; + pollstart = vq->kick = eventfp; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_CALL: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->call) { + filep = vq->call; + ctx = vq->call_ctx; + vq->call = eventfp; + vq->call_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + case VHOST_SET_VRING_ERR: + r = copy_from_user(&f, argp, sizeof f); + if (r < 0) + break; + eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + if (IS_ERR(eventfp)) + return PTR_ERR(eventfp); + if (eventfp != vq->error) { + filep = vq->error; + vq->error = eventfp; + ctx = vq->error_ctx; + vq->error_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + break; + default: + r = -ENOIOCTLCMD; + } + + if (pollstop && vq->handle_kick) + vhost_poll_stop(&vq->poll); + + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + + if (pollstart && vq->handle_kick) + vhost_poll_start(&vq->poll, vq->kick); + + mutex_unlock(&vq->mutex); + + if (pollstop && vq->handle_kick) + vhost_poll_flush(&vq->poll); + return r; +} + +/* Caller must have device mutex */ +long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct file *eventfp, *filep = NULL; + struct eventfd_ctx *ctx = NULL; + u64 p; + long r; + int i, fd; + + /* If you are not the owner, you can become one */ + if (ioctl == VHOST_SET_OWNER) { + r = vhost_dev_set_owner(d); + goto done; + } + + /* You must be the owner to do anything else */ + r = vhost_dev_check_owner(d); + if (r) + goto done; + + switch (ioctl) { + case VHOST_SET_MEM_TABLE: + r = vhost_set_memory(d, argp); + break; + case VHOST_SET_LOG_BASE: + r = copy_from_user(&p, argp, sizeof p); + if (r < 0) + break; + if ((u64)(unsigned long)p != p) { + r = -EFAULT; + break; + } + for (i = 0; i < d->nvqs; ++i) { + struct vhost_virtqueue *vq; + void __user *base = (void __user *)(unsigned long)p; + vq = d->vqs + i; + mutex_lock(&vq->mutex); + /* If ring is inactive, will check when it's enabled. */ + if (vq->private_data && !vq_log_access_ok(vq, base)) + r = -EFAULT; + else + vq->log_base = base; + mutex_unlock(&vq->mutex); + } + break; + case VHOST_SET_LOG_FD: + r = get_user(fd, (int __user *)argp); + if (r < 0) + break; + eventfp = fd == -1 ? NULL : eventfd_fget(fd); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } + if (eventfp != d->log_file) { + filep = d->log_file; + ctx = d->log_ctx; + d->log_ctx = eventfp ? + eventfd_ctx_fileget(eventfp) : NULL; + } else + filep = eventfp; + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i].mutex); + d->vqs[i].log_ctx = d->log_ctx; + mutex_unlock(&d->vqs[i].mutex); + } + if (ctx) + eventfd_ctx_put(ctx); + if (filep) + fput(filep); + break; + default: + r = vhost_set_vring(d, ioctl, argp); + break; + } +done: + return r; +} + +static const struct vhost_memory_region *find_region(struct vhost_memory *mem, + __u64 addr, __u32 len) +{ + struct vhost_memory_region *reg; + int i; + /* linear search is not brilliant, but we really have on the order of 6 + * regions in practice */ + for (i = 0; i < mem->nregions; ++i) { + reg = mem->regions + i; + if (reg->guest_phys_addr <= addr && + reg->guest_phys_addr + reg->memory_size - 1 >= addr) + return reg; + } + return NULL; +} + +/* TODO: This is really inefficient. We need something like get_user() + * (instruction directly accesses the data, with an exception table entry + * returning -EFAULT). See Documentation/x86/exception-tables.txt. + */ +static int set_bit_to_user(int nr, void __user *addr) +{ + unsigned long log = (unsigned long)addr; + struct page *page; + void *base; + int bit = nr + (log % PAGE_SIZE) * 8; + int r; + r = get_user_pages_fast(log, 1, 1, &page); + if (r) + return r; + base = kmap_atomic(page, KM_USER0); + set_bit(bit, base); + kunmap_atomic(base, KM_USER0); + set_page_dirty_lock(page); + put_page(page); + return 0; +} + +static int log_write(void __user *log_base, + u64 write_address, u64 write_length) +{ + int r; + if (!write_length) + return 0; + write_address /= VHOST_PAGE_SIZE; + for (;;) { + u64 base = (u64)(unsigned long)log_base; + u64 log = base + write_address / 8; + int bit = write_address % 8; + if ((u64)(unsigned long)log != log) + return -EFAULT; + r = set_bit_to_user(bit, (void __user *)(unsigned long)log); + if (r < 0) + return r; + if (write_length <= VHOST_PAGE_SIZE) + break; + write_length -= VHOST_PAGE_SIZE; + write_address += VHOST_PAGE_SIZE; + } + return r; +} + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len) +{ + int i, r; + + /* Make sure data written is seen before log. */ + wmb(); + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); + if (r < 0) + return r; + len -= l; + if (!len) + return 0; + } + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + /* Length written exceeds what we have stored. This is a bug. */ + BUG(); + return 0; +} + +int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, + struct iovec iov[], int iov_size) +{ + const struct vhost_memory_region *reg; + struct vhost_memory *mem; + struct iovec *_iov; + u64 s = 0; + int ret = 0; + + rcu_read_lock(); + + mem = rcu_dereference(dev->memory); + while ((u64)len > s) { + u64 size; + if (ret >= iov_size) { + ret = -ENOBUFS; + break; + } + reg = find_region(mem, addr, len); + if (!reg) { + ret = -EFAULT; + break; + } + _iov = iov + ret; + size = reg->memory_size - addr + reg->guest_phys_addr; + _iov->iov_len = min((u64)len, size); + _iov->iov_base = (void *)(unsigned long) + (reg->userspace_addr + addr - reg->guest_phys_addr); + s += size; + addr += size; + ++ret; + } + + rcu_read_unlock(); + return ret; +} + +/* Each buffer in the virtqueues is actually a chain of descriptors. This + * function returns the next descriptor in the chain, + * or -1U if we're at the end. */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc->flags & VRING_DESC_F_NEXT)) + return -1U; + + /* Check they're not leading us off end of descriptors. */ + next = desc->next; + /* Make sure compiler knows to grab that: we don't want it changing! */ + /* We will use the result as an index in an array, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + return next; +} + +static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num, + struct vring_desc *indirect) +{ + struct vring_desc desc; + unsigned int i = 0, count, found = 0; + int ret; + + /* Sanity check */ + if (indirect->len % sizeof desc) { + vq_err(vq, "Invalid length in indirect descriptor: " + "len 0x%llx not multiple of 0x%zx\n", + (unsigned long long)indirect->len, + sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect, + ARRAY_SIZE(vq->indirect)); + if (ret < 0) { + vq_err(vq, "Translation failure %d in indirect.\n", ret); + return ret; + } + + /* We will use the result as an address to read from, so most + * architectures only need a compiler barrier here. */ + read_barrier_depends(); + + count = indirect->len / sizeof desc; + /* Buffers are chained via a 16 bit next field, so + * we can have at most 2^16 of these. */ + if (count > USHORT_MAX + 1) { + vq_err(vq, "Indirect buffer length too big: %d\n", + indirect->len); + return -E2BIG; + } + + do { + unsigned iov_count = *in_num + *out_num; + if (++found > count) { + vq_err(vq, "Loop detected: last one at %u " + "indirect size %u\n", + i, count); + return -EINVAL; + } + if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect, + sizeof desc)) { + vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", + i, (size_t)indirect->addr + i * sizeof desc); + return -EINVAL; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); + return ret; + } + /* If this is an input descriptor, increment that count. */ + if (desc.flags & VRING_DESC_F_WRITE) { + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Indirect descriptor " + "has out after in: idx %d\n", i); + return -EINVAL; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + return 0; +} + +/* This looks in the virtqueue and for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function returns the descriptor number found, or vq->num (which + * is never a valid descriptor number) if none was found. */ +unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num) +{ + struct vring_desc desc; + unsigned int i, head, found = 0; + u16 last_avail_idx; + int ret; + + /* Check it isn't doing very strange things with descriptor numbers. */ + last_avail_idx = vq->last_avail_idx; + if (get_user(vq->avail_idx, &vq->avail->idx)) { + vq_err(vq, "Failed to access avail idx at %p\n", + &vq->avail->idx); + return vq->num; + } + + if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) { + vq_err(vq, "Guest moved used index from %u to %u", + last_avail_idx, vq->avail_idx); + return vq->num; + } + + /* If there's nothing new since last we looked, return invalid. */ + if (vq->avail_idx == last_avail_idx) + return vq->num; + + /* Only get avail ring entries after they have been exposed by guest. */ + rmb(); + + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ + if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) { + vq_err(vq, "Failed to read head: idx %d address %p\n", + last_avail_idx, + &vq->avail->ring[last_avail_idx % vq->num]); + return vq->num; + } + + /* If their number is silly, that's an error. */ + if (head >= vq->num) { + vq_err(vq, "Guest says index %u > %u is available", + head, vq->num); + return vq->num; + } + + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + if (unlikely(log)) + *log_num = 0; + + i = head; + do { + unsigned iov_count = *in_num + *out_num; + if (i >= vq->num) { + vq_err(vq, "Desc index is %u > %u, head = %u", + i, vq->num, head); + return vq->num; + } + if (++found > vq->num) { + vq_err(vq, "Loop detected: last one at %u " + "vq size %u head %u\n", + i, vq->num, head); + return vq->num; + } + ret = copy_from_user(&desc, vq->desc + i, sizeof desc); + if (ret) { + vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", + i, vq->desc + i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_INDIRECT) { + ret = get_indirect(dev, vq, iov, iov_size, + out_num, in_num, + log, log_num, &desc); + if (ret < 0) { + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); + return vq->num; + } + continue; + } + + ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count, + iov_size - iov_count); + if (ret < 0) { + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); + return vq->num; + } + if (desc.flags & VRING_DESC_F_WRITE) { + /* If this is an input descriptor, + * increment that count. */ + *in_num += ret; + if (unlikely(log)) { + log[*log_num].addr = desc.addr; + log[*log_num].len = desc.len; + ++*log_num; + } + } else { + /* If it's an output descriptor, they're all supposed + * to come before any input descriptors. */ + if (*in_num) { + vq_err(vq, "Descriptor has out after in: " + "idx %d\n", i); + return vq->num; + } + *out_num += ret; + } + } while ((i = next_desc(&desc)) != -1); + + /* On success, increment avail index. */ + vq->last_avail_idx++; + return head; +} + +/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ +void vhost_discard_vq_desc(struct vhost_virtqueue *vq) +{ + vq->last_avail_idx--; +} + +/* After we've used one of their buffers, we tell them about it. We'll then + * want to notify the guest, using eventfd. */ +int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem *used; + + /* The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. */ + used = &vq->used->ring[vq->last_used_idx % vq->num]; + if (put_user(head, &used->id)) { + vq_err(vq, "Failed to write used id"); + return -EFAULT; + } + if (put_user(len, &used->len)) { + vq_err(vq, "Failed to write used len"); + return -EFAULT; + } + /* Make sure buffer is written before we update index. */ + wmb(); + if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { + vq_err(vq, "Failed to increment used idx"); + return -EFAULT; + } + if (unlikely(vq->log_used)) { + /* Make sure data is seen before log. */ + wmb(); + log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring * + (vq->last_used_idx % vq->num), + sizeof *vq->used->ring); + log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + vq->last_used_idx++; + return 0; +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __u16 flags = 0; + if (get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return; + } + + /* If they don't want an interrupt, don't signal, unless empty. */ + if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && + (vq->avail_idx != vq->last_avail_idx || + !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) + return; + + /* Signal the Guest tell them we used something up. */ + if (vq->call_ctx) + eventfd_signal(vq->call_ctx, 1); +} + +/* And here's the combo meal deal. Supersize me! */ +void vhost_add_used_and_signal(struct vhost_dev *dev, + struct vhost_virtqueue *vq, + unsigned int head, int len) +{ + vhost_add_used(vq, head, len); + vhost_signal(dev, vq); +} + +/* OK, now we need to know about added descriptors. */ +bool vhost_enable_notify(struct vhost_virtqueue *vq) +{ + u16 avail_idx; + int r; + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) + return false; + vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + /* They could have slipped one in as we were doing that: make + * sure it's written, then check again. */ + mb(); + r = get_user(avail_idx, &vq->avail->idx); + if (r) { + vq_err(vq, "Failed to check avail idx at %p: %d\n", + &vq->avail->idx, r); + return false; + } + + return avail_idx != vq->last_avail_idx; +} + +/* We don't need to be notified again. */ +void vhost_disable_notify(struct vhost_virtqueue *vq) +{ + int r; + if (vq->used_flags & VRING_USED_F_NO_NOTIFY) + return; + vq->used_flags |= VRING_USED_F_NO_NOTIFY; + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); +} + +int vhost_init(void) +{ + vhost_workqueue = create_singlethread_workqueue("vhost"); + if (!vhost_workqueue) + return -ENOMEM; + return 0; +} + +void vhost_cleanup(void) +{ + destroy_workqueue(vhost_workqueue); +} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h new file mode 100644 index 00000000000..44591ba9b07 --- /dev/null +++ b/drivers/vhost/vhost.h @@ -0,0 +1,161 @@ +#ifndef _VHOST_H +#define _VHOST_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct vhost_device; + +enum { + /* Enough place for all fragments, head, and virtio net header. */ + VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2, +}; + +/* Poll a file (eventfd or socket) */ +/* Note: there's nothing vhost specific about this structure. */ +struct vhost_poll { + poll_table table; + wait_queue_head_t *wqh; + wait_queue_t wait; + /* struct which will handle all actual work. */ + struct work_struct work; + unsigned long mask; +}; + +void vhost_poll_init(struct vhost_poll *poll, work_func_t func, + unsigned long mask); +void vhost_poll_start(struct vhost_poll *poll, struct file *file); +void vhost_poll_stop(struct vhost_poll *poll); +void vhost_poll_flush(struct vhost_poll *poll); +void vhost_poll_queue(struct vhost_poll *poll); + +struct vhost_log { + u64 addr; + u64 len; +}; + +/* The virtqueue structure describes a queue attached to a device. */ +struct vhost_virtqueue { + struct vhost_dev *dev; + + /* The actual ring of buffers. */ + struct mutex mutex; + unsigned int num; + struct vring_desc __user *desc; + struct vring_avail __user *avail; + struct vring_used __user *used; + struct file *kick; + struct file *call; + struct file *error; + struct eventfd_ctx *call_ctx; + struct eventfd_ctx *error_ctx; + struct eventfd_ctx *log_ctx; + + struct vhost_poll poll; + + /* The routine to call when the Guest pings us, or timeout. */ + work_func_t handle_kick; + + /* Last available index we saw. */ + u16 last_avail_idx; + + /* Caches available index value from user. */ + u16 avail_idx; + + /* Last index we used. */ + u16 last_used_idx; + + /* Used flags */ + u16 used_flags; + + /* Log writes to used structure. */ + bool log_used; + u64 log_addr; + + struct iovec indirect[VHOST_NET_MAX_SG]; + struct iovec iov[VHOST_NET_MAX_SG]; + struct iovec hdr[VHOST_NET_MAX_SG]; + size_t hdr_size; + /* We use a kind of RCU to access private pointer. + * All readers access it from workqueue, which makes it possible to + * flush the workqueue instead of synchronize_rcu. Therefore readers do + * not need to call rcu_read_lock/rcu_read_unlock: the beginning of + * work item execution acts instead of rcu_read_lock() and the end of + * work item execution acts instead of rcu_read_lock(). + * Writers use virtqueue mutex. */ + void *private_data; + /* Log write descriptors */ + void __user *log_base; + struct vhost_log log[VHOST_NET_MAX_SG]; +}; + +struct vhost_dev { + /* Readers use RCU to access memory table pointer + * log base pointer and features. + * Writers use mutex below.*/ + struct vhost_memory *memory; + struct mm_struct *mm; + struct mutex mutex; + unsigned acked_features; + struct vhost_virtqueue *vqs; + int nvqs; + struct file *log_file; + struct eventfd_ctx *log_ctx; +}; + +long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); +long vhost_dev_check_owner(struct vhost_dev *); +long vhost_dev_reset_owner(struct vhost_dev *); +void vhost_dev_cleanup(struct vhost_dev *); +long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); +int vhost_vq_access_ok(struct vhost_virtqueue *vq); +int vhost_log_access_ok(struct vhost_dev *); + +unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, + unsigned int *out_num, unsigned int *in_num, + struct vhost_log *log, unsigned int *log_num); +void vhost_discard_vq_desc(struct vhost_virtqueue *); + +int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); +void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); +void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, + unsigned int head, int len); +void vhost_disable_notify(struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_virtqueue *); + +int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, + unsigned int log_num, u64 len); + +int vhost_init(void); +void vhost_cleanup(void); + +#define vq_err(vq, fmt, ...) do { \ + pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ + if ((vq)->error_ctx) \ + eventfd_signal((vq)->error_ctx, 1);\ + } while (0) + +enum { + VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1 << VIRTIO_RING_F_INDIRECT_DESC) | + (1 << VHOST_F_LOG_ALL) | + (1 << VHOST_NET_F_VIRTIO_NET_HDR), +}; + +static inline int vhost_has_feature(struct vhost_dev *dev, int bit) +{ + unsigned acked_features = rcu_dereference(dev->acked_features); + return acked_features & (1 << bit); +} + +#endif diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 756f831cbdd..d93080748a9 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -362,6 +362,7 @@ unifdef-y += uio.h unifdef-y += unistd.h unifdef-y += usbdevice_fs.h unifdef-y += utsname.h +unifdef-y += vhost.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index adaf3c15e44..8b5f7cc0fba 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -30,6 +30,7 @@ #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 +#define VHOST_NET_MINOR 233 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/vhost.h b/include/linux/vhost.h new file mode 100644 index 00000000000..e847f1e3075 --- /dev/null +++ b/include/linux/vhost.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_VHOST_H +#define _LINUX_VHOST_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include +#include +#include +#include +#include + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* ioctls */ + +#define VHOST_VIRTIO 0xAF + +/* Features bitmask for forward compatibility. Transport bits are used for + * vhost specific features. */ +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) + +/* Set current process as the (exclusive) owner of this file descriptor. This + * must be called before any other vhost command. Further calls to + * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +/* Give up ownership, and reset the device to default values. + * Allows subsequent call to VHOST_OWNER_SET to succeed. */ +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) + +/* Set up/modify memory layout */ +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) + +/* Write logging setup. */ +/* Memory writes can optionally be logged by setting bit at an offset + * (calculated from the physical address) from specified log base. + * The bit is set using an atomic 32 bit operation. */ +/* Set base address for logging. */ +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Specify an eventfd file descriptor to signal on log write. */ +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + +/* Ring setup. */ +/* Set number of descriptors in ring. This parameter can not + * be modified while ring is running (bound to a device). */ +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +/* Set addresses for the ring. */ +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +/* Base value where queue looks for available descriptors */ +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Get accessor: reads index, writes value in num */ +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) + +/* The following ioctls use eventfd file descriptors to signal and poll + * for events. */ + +/* Set eventfd to poll for added buffers */ +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +/* Set eventfd to signal when buffers have beed used */ +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +/* Set eventfd to signal an error */ +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) + +/* VHOST_NET specific defines */ + +/* Attach virtio net ring to a raw socket, or tap device. + * The socket must be already bound to an ethernet device, this device will be + * used for transmit. Pass fd -1 to unbind from the socket and the transmit + * device. This can be used to stop the ring (e.g. for migration). */ +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif -- cgit v1.2.3-70-g09d2 From 889ff0150661512d79484219612b7e2e024b6c07 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 20:04:47 +0100 Subject: perf/core: Split context's event group list into pinned and non-pinned lists Split-up struct perf_event_context::group_list into pinned_groups and flexible_groups (non-pinned). This first appears to be useless as it duplicates various loops around the group list handlings. But it scales better in the fast-path in perf_sched_in(). We don't anymore iterate twice through the entire list to separate pinned and non-pinned scheduling. Instead we interate through two distinct lists. The another desired effect is that it makes easier to define distinct scheduling rules on both. Changes in v2: - Respectively rename pinned_grp_list and volatile_grp_list into pinned_groups and flexible_groups as per Ingo suggestion. - Various cleanups Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 3 +- kernel/perf_event.c | 227 ++++++++++++++++++++++++++++++--------------- 2 files changed, 153 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9a1d276db75..cdbc2aa64a0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,8 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head group_list; + struct list_head pinned_groups; + struct list_head flexible_groups; struct list_head event_list; int nr_events; int nr_active; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 27f69a04541..c9f8a757649 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event) event->total_time_running = run_end - event->tstamp_running; } +static struct list_head * +ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) +{ + if (event->attr.pinned) + return &ctx->pinned_groups; + else + return &ctx->flexible_groups; +} + /* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) * add it straight to the context's event list, or to the group * leader's sibling list: */ - if (group_leader == event) - list_add_tail(&event->group_entry, &ctx->group_list); - else { + if (group_leader == event) { + struct list_head *list; + + list = ctx_group_list(event, ctx); + list_add_tail(&event->group_entry, list); + } else { list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } @@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) * to the context list directly: */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { + struct list_head *list; - list_move_tail(&sibling->group_entry, &ctx->group_list); + list = ctx_group_list(event, ctx); + list_move_tail(&sibling->group_entry, list); sibling->group_leader = sibling; } } @@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, perf_disable(); if (ctx->nr_active) { - list_for_each_entry(event, &ctx->group_list, group_entry) + list_for_each_entry(event, &ctx->pinned_groups, group_entry) + group_sched_out(event, cpuctx, ctx); + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } perf_enable(); @@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF || - !event->attr.pinned) + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF) continue; if (event->cpu != -1 && event->cpu != cpu) continue; @@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx, } } - list_for_each_entry(event, &ctx->group_list, group_entry) { - /* - * Ignore events in OFF or ERROR state, and - * ignore pinned events since we did them already. - */ - if (event->state <= PERF_EVENT_STATE_OFF || - event->attr.pinned) + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + /* Ignore events in OFF or ERROR state */ + if (event->state <= PERF_EVENT_STATE_OFF) continue; - /* * Listen to the 'cpu' scheduling filter constraint * of events: @@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx) * Rotate the first entry last (works just fine for group events too): */ perf_disable(); - list_for_each_entry(event, &ctx->group_list, group_entry) { - list_move_tail(&event->group_entry, &ctx->group_list); + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + list_move_tail(&event->group_entry, &ctx->pinned_groups); + break; + } + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + list_move_tail(&event->group_entry, &ctx->flexible_groups); break; } perf_enable(); @@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr) perf_event_task_sched_in(curr); } +static int event_enable_on_exec(struct perf_event *event, + struct perf_event_context *ctx) +{ + if (!event->attr.enable_on_exec) + return 0; + + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) + return 0; + + __perf_event_mark_enabled(event, ctx); + + return 1; +} + /* * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. @@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) struct perf_event *event; unsigned long flags; int enabled = 0; + int ret; local_irq_save(flags); ctx = task->perf_event_ctxp; @@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_lock(&ctx->lock); - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (!event->attr.enable_on_exec) - continue; - event->attr.enable_on_exec = 0; - if (event->state >= PERF_EVENT_STATE_INACTIVE) - continue; - __perf_event_mark_enabled(event, ctx); - enabled = 1; + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + ret = event_enable_on_exec(event, ctx); + if (ret) + enabled = 1; + } + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + ret = event_enable_on_exec(event, ctx); + if (ret) + enabled = 1; } /* @@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx, { raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->group_list); + INIT_LIST_HEAD(&ctx->pinned_groups); + INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); ctx->task = task; @@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child) mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); again: - list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); + + list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, group_entry) __perf_event_exit_task(child_event, child_ctx, child); @@ -5041,7 +5080,8 @@ again: * its siblings to the list, but we obtained 'tmp' before that which * will still point to the list head terminating the iteration. */ - if (!list_empty(&child_ctx->group_list)) + if (!list_empty(&child_ctx->pinned_groups) || + !list_empty(&child_ctx->flexible_groups)) goto again; mutex_unlock(&child_ctx->mutex); @@ -5049,6 +5089,24 @@ again: put_ctx(child_ctx); } +static void perf_free_event(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *parent = event->parent; + + if (WARN_ON_ONCE(!parent)) + return; + + mutex_lock(&parent->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_event(event, ctx); + free_event(event); +} + /* * free an unexposed, unused context as created by inheritance by * init_task below, used by fork() in case of fail. @@ -5063,36 +5121,70 @@ void perf_event_free_task(struct task_struct *task) mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { - struct perf_event *parent = event->parent; + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) + perf_free_event(event, ctx); - if (WARN_ON_ONCE(!parent)) - continue; + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, + group_entry) + perf_free_event(event, ctx); - mutex_lock(&parent->child_mutex); - list_del_init(&event->child_list); - mutex_unlock(&parent->child_mutex); + if (!list_empty(&ctx->pinned_groups) || + !list_empty(&ctx->flexible_groups)) + goto again; - fput(parent->filp); + mutex_unlock(&ctx->mutex); - list_del_event(event, ctx); - free_event(event); + put_ctx(ctx); +} + +static int +inherit_task_group(struct perf_event *event, struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + int *inherited_all) +{ + int ret; + struct perf_event_context *child_ctx = child->perf_event_ctxp; + + if (!event->attr.inherit) { + *inherited_all = 0; + return 0; } - if (!list_empty(&ctx->group_list)) - goto again; + if (!child_ctx) { + /* + * This is executed from the parent task context, so + * inherit events that have been marked for cloning. + * First allocate and initialize a context for the + * child. + */ - mutex_unlock(&ctx->mutex); + child_ctx = kzalloc(sizeof(struct perf_event_context), + GFP_KERNEL); + if (!child_ctx) + return -ENOMEM; - put_ctx(ctx); + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; + get_task_struct(child); + } + + ret = inherit_group(event, parent, parent_ctx, + child, child_ctx); + + if (ret) + *inherited_all = 0; + + return ret; } + /* * Initialize the perf_event context in task_struct */ int perf_event_init_task(struct task_struct *child) { - struct perf_event_context *child_ctx = NULL, *parent_ctx; + struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; struct perf_event *event; struct task_struct *parent = current; @@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child) * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry(event, &parent_ctx->group_list, group_entry) { - - if (!event->attr.inherit) { - inherited_all = 0; - continue; - } - - if (!child->perf_event_ctxp) { - /* - * This is executed from the parent task context, so - * inherit events that have been marked for cloning. - * First allocate and initialize a context for the - * child. - */ - - child_ctx = kzalloc(sizeof(struct perf_event_context), - GFP_KERNEL); - if (!child_ctx) { - ret = -ENOMEM; - break; - } - - __perf_event_init_context(child_ctx, child); - child->perf_event_ctxp = child_ctx; - get_task_struct(child); - } + list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { + ret = inherit_task_group(event, parent, parent_ctx, child, + &inherited_all); + if (ret) + break; + } - ret = inherit_group(event, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; + list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { + ret = inherit_task_group(event, parent, parent_ctx, child, + &inherited_all); + if (ret) break; - } } + child_ctx = child->perf_event_ctxp; + if (child_ctx && inherited_all) { /* * Mark the child context as a clone of the parent @@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info) struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) + __perf_event_remove_from_context(event); + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) __perf_event_remove_from_context(event); } static void perf_event_exit_cpu(int cpu) -- cgit v1.2.3-70-g09d2 From 5908cdc85eb30f8d07f2cb11d4a62334d7229048 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 20:53:14 +0100 Subject: list: Introduce list_rotate_left() Bring a new list_rotate_left() helper that rotates a list to the left. This is useful for codes that need to round roubin elements which queue priority increases from tail to head. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/list.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 969f6e92d08..5d9c6558e8a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -205,6 +205,20 @@ static inline int list_empty_careful(const struct list_head *head) return (next == head) && (next == head->prev); } +/** + * list_rotate_left - rotate the list to the left + * @head: the head of the list + */ +static inline void list_rotate_left(struct list_head *head) +{ + struct list_head *first; + + if (!list_empty(head)) { + first = head->next; + list_move_tail(first, head); + } +} + /** * list_is_singular - tests whether a list has just one entry. * @head: the list to test. -- cgit v1.2.3-70-g09d2 From d6f962b57bfaab62891c7abbf1469212a56d6103 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 10 Jan 2010 01:25:51 +0100 Subject: perf: Export software-only event group characteristic as a flag Before scheduling an event group, we first check if a group can go on. We first check if the group is made of software only events first, in which case it is enough to know if the group can be scheduled in. For that purpose, we iterate through the whole group, which is wasteful as we could do this check when we add/delete an event to a group. So we create a group_flags field in perf event that can host characteristics from a group of events, starting with a first PERF_GROUP_SOFTWARE flag that reduces the check on the fast path. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 5 +++++ kernel/perf_event.c | 30 +++++++++++------------------- 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cdbc2aa64a0..c6f812e4d05 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int, struct perf_sample_data *, struct pt_regs *regs); +enum perf_group_flag { + PERF_GROUP_SOFTWARE = 0x1, +}; + /** * struct perf_event - performance event kernel representation: */ @@ -574,6 +578,7 @@ struct perf_event { struct list_head event_entry; struct list_head sibling_list; int nr_siblings; + int group_flags; struct perf_event *group_leader; struct perf_event *output; const struct pmu *pmu; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index bbebe283263..eae6ff69360 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -315,9 +315,16 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) if (group_leader == event) { struct list_head *list; + if (is_software_event(event)) + event->group_flags |= PERF_GROUP_SOFTWARE; + list = ctx_group_list(event, ctx); list_add_tail(&event->group_entry, list); } else { + if (group_leader->group_flags & PERF_GROUP_SOFTWARE && + !is_software_event(event)) + group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; + list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } @@ -372,6 +379,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) list = ctx_group_list(event, ctx); list_move_tail(&sibling->group_entry, list); sibling->group_leader = sibling; + + /* Inherit group flags from the previous leader */ + sibling->group_flags = event->group_flags; } } @@ -699,24 +709,6 @@ group_error: return -EAGAIN; } -/* - * Return 1 for a group consisting entirely of software events, - * 0 if the group contains any hardware events. - */ -static int is_software_only_group(struct perf_event *leader) -{ - struct perf_event *event; - - if (!is_software_event(leader)) - return 0; - - list_for_each_entry(event, &leader->sibling_list, group_entry) - if (!is_software_event(event)) - return 0; - - return 1; -} - /* * Work out whether we can put this event group on the CPU now. */ @@ -727,7 +719,7 @@ static int group_can_go_on(struct perf_event *event, /* * Groups consisting entirely of software events can always go on. */ - if (is_software_only_group(event)) + if (event->group_flags & PERF_GROUP_SOFTWARE) return 1; /* * If an exclusive group is already on, no other hardware -- cgit v1.2.3-70-g09d2 From 73c89c15b959adf06366722c4be8d2eddec0a529 Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Sun, 17 Jan 2010 21:52:11 +1100 Subject: crypto: gcm - Add RFC4543 wrapper for GCM This patch adds the RFC4543 (GMAC) wrapper for GCM similar to the existing RFC4106 wrapper. The main differences between GCM and GMAC are the contents of the AAD and that the plaintext is empty for the latter. Signed-off-by: Tobias Brunner Signed-off-by: Herbert Xu --- crypto/gcm.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pfkeyv2.h | 1 + net/xfrm/xfrm_algo.c | 16 +++ 3 files changed, 304 insertions(+) (limited to 'include/linux') diff --git a/crypto/gcm.c b/crypto/gcm.c index c6547130624..2f5fbba6576 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -37,6 +37,19 @@ struct crypto_rfc4106_ctx { u8 nonce[4]; }; +struct crypto_rfc4543_ctx { + struct crypto_aead *child; + u8 nonce[4]; +}; + +struct crypto_rfc4543_req_ctx { + u8 auth_tag[16]; + struct scatterlist cipher[1]; + struct scatterlist payload[2]; + struct scatterlist assoc[2]; + struct aead_request subreq; +}; + struct crypto_gcm_ghash_ctx { unsigned int cryptlen; struct scatterlist *src; @@ -1047,6 +1060,272 @@ static struct crypto_template crypto_rfc4106_tmpl = { .module = THIS_MODULE, }; +static inline struct crypto_rfc4543_req_ctx *crypto_rfc4543_reqctx( + struct aead_request *req) +{ + unsigned long align = crypto_aead_alignmask(crypto_aead_reqtfm(req)); + + return (void *)PTR_ALIGN((u8 *)aead_request_ctx(req), align + 1); +} + +static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key, + unsigned int keylen) +{ + struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(parent); + struct crypto_aead *child = ctx->child; + int err; + + if (keylen < 4) + return -EINVAL; + + keylen -= 4; + memcpy(ctx->nonce, key + keylen, 4); + + crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_aead_set_flags(child, crypto_aead_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_aead_setkey(child, key, keylen); + crypto_aead_set_flags(parent, crypto_aead_get_flags(child) & + CRYPTO_TFM_RES_MASK); + + return err; +} + +static int crypto_rfc4543_setauthsize(struct crypto_aead *parent, + unsigned int authsize) +{ + struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(parent); + + if (authsize != 16) + return -EINVAL; + + return crypto_aead_setauthsize(ctx->child, authsize); +} + +/* this is the same as crypto_authenc_chain */ +static void crypto_rfc4543_chain(struct scatterlist *head, + struct scatterlist *sg, int chain) +{ + if (chain) { + head->length += sg->length; + sg = scatterwalk_sg_next(sg); + } + + if (sg) + scatterwalk_sg_chain(head, 2, sg); + else + sg_mark_end(head); +} + +static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, + int enc) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead); + struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); + struct aead_request *subreq = &rctx->subreq; + struct scatterlist *dst = req->dst; + struct scatterlist *cipher = rctx->cipher; + struct scatterlist *payload = rctx->payload; + struct scatterlist *assoc = rctx->assoc; + unsigned int authsize = crypto_aead_authsize(aead); + unsigned int assoclen = req->assoclen; + struct page *dstp; + u8 *vdst; + u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child), + crypto_aead_alignmask(ctx->child) + 1); + + memcpy(iv, ctx->nonce, 4); + memcpy(iv + 4, req->iv, 8); + + /* construct cipher/plaintext */ + if (enc) + memset(rctx->auth_tag, 0, authsize); + else + scatterwalk_map_and_copy(rctx->auth_tag, dst, + req->cryptlen - authsize, + authsize, 0); + + sg_init_one(cipher, rctx->auth_tag, authsize); + + /* construct the aad */ + dstp = sg_page(dst); + vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + dst->offset; + + sg_init_table(payload, 2); + sg_set_buf(payload, req->iv, 8); + crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8); + assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); + + sg_init_table(assoc, 2); + sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, + req->assoc->offset); + crypto_rfc4543_chain(assoc, payload, 0); + + aead_request_set_tfm(subreq, ctx->child); + aead_request_set_callback(subreq, req->base.flags, req->base.complete, + req->base.data); + aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv); + aead_request_set_assoc(subreq, assoc, assoclen); + + return subreq; +} + +static int crypto_rfc4543_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); + struct aead_request *subreq; + int err; + + subreq = crypto_rfc4543_crypt(req, 1); + err = crypto_aead_encrypt(subreq); + if (err) + return err; + + scatterwalk_map_and_copy(rctx->auth_tag, req->dst, req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int crypto_rfc4543_decrypt(struct aead_request *req) +{ + req = crypto_rfc4543_crypt(req, 0); + + return crypto_aead_decrypt(req); +} + +static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_aead *aead; + unsigned long align; + + aead = crypto_spawn_aead(spawn); + if (IS_ERR(aead)) + return PTR_ERR(aead); + + ctx->child = aead; + + align = crypto_aead_alignmask(aead); + align &= ~(crypto_tfm_ctx_alignment() - 1); + tfm->crt_aead.reqsize = sizeof(struct crypto_rfc4543_req_ctx) + + ALIGN(crypto_aead_reqsize(aead), + crypto_tfm_ctx_alignment()) + + align + 16; + + return 0; +} + +static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_aead(ctx->child); +} + +static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) +{ + struct crypto_attr_type *algt; + struct crypto_instance *inst; + struct crypto_aead_spawn *spawn; + struct crypto_alg *alg; + const char *ccm_name; + int err; + + algt = crypto_get_attr_type(tb); + err = PTR_ERR(algt); + if (IS_ERR(algt)) + return ERR_PTR(err); + + if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) + return ERR_PTR(-EINVAL); + + ccm_name = crypto_attr_alg_name(tb[1]); + err = PTR_ERR(ccm_name); + if (IS_ERR(ccm_name)) + return ERR_PTR(err); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return ERR_PTR(-ENOMEM); + + spawn = crypto_instance_ctx(inst); + crypto_set_aead_spawn(spawn, inst); + err = crypto_grab_aead(spawn, ccm_name, 0, + crypto_requires_sync(algt->type, algt->mask)); + if (err) + goto out_free_inst; + + alg = crypto_aead_spawn_alg(spawn); + + err = -EINVAL; + + /* We only support 16-byte blocks. */ + if (alg->cra_aead.ivsize != 16) + goto out_drop_alg; + + /* Not a stream cipher? */ + if (alg->cra_blocksize != 1) + goto out_drop_alg; + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, + "rfc4543(%s)", alg->cra_name) >= CRYPTO_MAX_ALG_NAME || + snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "rfc4543(%s)", alg->cra_driver_name) >= + CRYPTO_MAX_ALG_NAME) + goto out_drop_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD; + inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = 1; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_nivaead_type; + + inst->alg.cra_aead.ivsize = 8; + inst->alg.cra_aead.maxauthsize = 16; + + inst->alg.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx); + + inst->alg.cra_init = crypto_rfc4543_init_tfm; + inst->alg.cra_exit = crypto_rfc4543_exit_tfm; + + inst->alg.cra_aead.setkey = crypto_rfc4543_setkey; + inst->alg.cra_aead.setauthsize = crypto_rfc4543_setauthsize; + inst->alg.cra_aead.encrypt = crypto_rfc4543_encrypt; + inst->alg.cra_aead.decrypt = crypto_rfc4543_decrypt; + + inst->alg.cra_aead.geniv = "seqiv"; + +out: + return inst; + +out_drop_alg: + crypto_drop_aead(spawn); +out_free_inst: + kfree(inst); + inst = ERR_PTR(err); + goto out; +} + +static void crypto_rfc4543_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_rfc4543_tmpl = { + .name = "rfc4543", + .alloc = crypto_rfc4543_alloc, + .free = crypto_rfc4543_free, + .module = THIS_MODULE, +}; + static int __init crypto_gcm_module_init(void) { int err; @@ -1067,8 +1346,14 @@ static int __init crypto_gcm_module_init(void) if (err) goto out_undo_gcm; + err = crypto_register_template(&crypto_rfc4543_tmpl); + if (err) + goto out_undo_rfc4106; + return 0; +out_undo_rfc4106: + crypto_unregister_template(&crypto_rfc4106_tmpl); out_undo_gcm: crypto_unregister_template(&crypto_gcm_tmpl); out_undo_base: @@ -1081,6 +1366,7 @@ out: static void __exit crypto_gcm_module_exit(void) { kfree(gcm_zeroes); + crypto_unregister_template(&crypto_rfc4543_tmpl); crypto_unregister_template(&crypto_rfc4106_tmpl); crypto_unregister_template(&crypto_gcm_tmpl); crypto_unregister_template(&crypto_gcm_base_tmpl); @@ -1094,3 +1380,4 @@ MODULE_DESCRIPTION("Galois/Counter Mode"); MODULE_AUTHOR("Mikko Herranen "); MODULE_ALIAS("gcm_base"); MODULE_ALIAS("rfc4106"); +MODULE_ALIAS("rfc4543"); diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 228b0b6306b..0b80c806631 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -315,6 +315,7 @@ struct sadb_x_kmaddress { #define SADB_X_EALG_AES_GCM_ICV12 19 #define SADB_X_EALG_AES_GCM_ICV16 20 #define SADB_X_EALG_CAMELLIACBC 22 +#define SADB_X_EALG_NULL_AES_GMAC 23 #define SADB_EALG_MAX 253 /* last EALG */ /* private allocations should use 249-255 (RFC2407) */ #define SADB_X_EALG_SERPENTCBC 252 /* draft-ietf-ipsec-ciph-aes-cbc-00 */ diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 743c0134a6a..8b4d6e3246e 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -125,6 +125,22 @@ static struct xfrm_algo_desc aead_list[] = { .sadb_alg_maxbits = 256 } }, +{ + .name = "rfc4543(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc aalg_list[] = { -- cgit v1.2.3-70-g09d2 From 8b0e58a70a7a41443c779de074288035b014cb94 Mon Sep 17 00:00:00 2001 From: Stephane Chatty Date: Wed, 13 Jan 2010 21:52:34 +0100 Subject: HID: let hid-input accept digitizers Extended IS_INPUT_APPLICATION to accept digitzers that are actual input devices (touchscreens, light pens, touch pads, white boards) Signed-off-by: Stephane Chatty Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 87093652dda..b978c1e2e74 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -663,7 +663,7 @@ struct hid_ll_driver { /* Applications from HID Usage Tables 4/8/99 Version 1.1 */ /* We ignore a few input applications that are not widely used */ -#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || (a == 0x000d0002)) +#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || ((a >= 0x000d0002) && (a <= 0x000d0006))) /* HID core API */ -- cgit v1.2.3-70-g09d2 From a83d8e8d099fc373a5ca7112ad08c553bb2c180f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:21:13 +0100 Subject: netfilter: xtables: add struct xt_mtchk_param::net Some complex match modules (like xt_hashlimit/xt_recent) want netns information at constructor and destructor time. We propably can play games at match destruction time, because netns can be passed in object, but I think it's cleaner to explicitly pass netns. Add ->net, make sure it's set from ebtables/iptables/ip6tables code. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 1 + net/bridge/netfilter/ebtables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 24 ++++++++++++++---------- net/ipv6/netfilter/ip6_tables.c | 14 ++++++++------ 4 files changed, 32 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 378f27ae777..88261b9829a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -205,6 +205,7 @@ struct xt_match_param { * @hook_mask: via which hooks the new rule is reachable */ struct xt_mtchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_match *match; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index bd1c65425d4..c77bab98669 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -619,7 +619,9 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) } static inline int -ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, +ebt_check_entry(struct ebt_entry *e, + struct net *net, + struct ebt_table_info *newinfo, const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { @@ -671,6 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, } i = 0; + mtpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; @@ -808,7 +811,8 @@ letscontinue: } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ -static int translate_table(char *name, struct ebt_table_info *newinfo) +static int translate_table(struct net *net, char *name, + struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; @@ -917,7 +921,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo) /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt); + ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, &i); @@ -1017,7 +1021,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) if (ret != 0) goto free_counterstmp; - ret = translate_table(tmp.name, newinfo); + ret = translate_table(net, tmp.name, newinfo); if (ret != 0) goto free_counterstmp; @@ -1154,7 +1158,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) newinfo->hook_entry[i] = p + ((char *)repl->hook_entry[i] - repl->entries); } - ret = translate_table(repl->name, newinfo); + ret = translate_table(net, repl->name, newinfo); if (ret != 0) { BUGPRINT("Translate_table failed\n"); goto free_chainstack; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552e..a069d72d948 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -661,8 +661,8 @@ static int check_target(struct ipt_entry *e, const char *name) } static int -find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size, unsigned int *i) { struct ipt_entry_target *t; struct xt_target *target; @@ -675,6 +675,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; @@ -798,7 +799,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, +translate_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, void *entry0, @@ -860,7 +862,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + find_check_entry, net, name, size, &i); if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, @@ -1303,7 +1305,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, + ret = translate_table(net, tmp.name, tmp.valid_hooks, newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) @@ -1655,7 +1657,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } static int -compat_check_entry(struct ipt_entry *e, const char *name, +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int *i) { struct xt_mtchk_param mtpar; @@ -1663,6 +1665,7 @@ compat_check_entry(struct ipt_entry *e, const char *name, int ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; @@ -1684,7 +1687,8 @@ compat_check_entry(struct ipt_entry *e, const char *name, } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1773,7 +1777,7 @@ translate_compat_table(const char *name, i = 0; ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + net, name, &i); if (ret) { j -= i; COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, @@ -1833,7 +1837,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -2086,7 +2090,7 @@ struct xt_table *ipt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, + ret = translate_table(net, table->name, table->valid_hooks, newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 480d7f8c980..a825940a92e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -693,8 +693,8 @@ static int check_target(struct ip6t_entry *e, const char *name) } static int -find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, + unsigned int size, unsigned int *i) { struct ip6t_entry_target *t; struct xt_target *target; @@ -707,6 +707,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; @@ -830,7 +831,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, +translate_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, void *entry0, @@ -892,7 +894,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + find_check_entry, net, name, size, &i); if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, @@ -1336,7 +1338,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, + ret = translate_table(net, tmp.name, tmp.valid_hooks, newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) @@ -2121,7 +2123,7 @@ struct xt_table *ip6t_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, + ret = translate_table(net, table->name, table->valid_hooks, newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, -- cgit v1.2.3-70-g09d2 From f54e9367f8499a9bf6b2afbc0dce63e1d53c525a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:25:47 +0100 Subject: netfilter: xtables: add struct xt_mtdtor_param::net Add ->net to match destructor list like ->net in constructor list. Make sure it's set in ebtables/iptables/ip6tables, this requires to propagate netns up to *_unregister_table(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 1 + include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 19 ++++++++-------- net/ipv4/netfilter/ip_tables.c | 25 +++++++++++---------- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 37 +++++++++++++++++-------------- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- 19 files changed, 59 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 88261b9829a..3caf5e15110 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -216,6 +216,7 @@ struct xt_mtchk_param { /* Match destructor parameters */ struct xt_mtdtor_param { + struct net *net; const struct xt_match *match; void *matchinfo; u_int8_t family; diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 3cc40c131cc..1c6f0c5f530 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -289,7 +289,7 @@ struct ebt_table { ~(__alignof__(struct ebt_replace)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); -extern void ebt_unregister_table(struct ebt_table *table); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 27b3f580730..8d1f273d350 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -242,7 +242,7 @@ extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl); -extern void ipt_unregister_table(struct xt_table *table); +extern void ipt_unregister_table(struct net *net, struct xt_table *table); /* Standard entry. */ struct ipt_standard { diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b31050d20ae..d2952d2fa65 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,7 +300,7 @@ extern void ip6t_init(void) __init; extern struct xt_table *ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl); -extern void ip6t_unregister_table(struct xt_table *table); +extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d32ab13e728..ae3f106c390 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -71,7 +71,7 @@ static int __net_init broute_net_init(struct net *net) static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net->xt.broute_table); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 60b1a6ca718..42e6bd09457 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -107,7 +107,7 @@ static int __net_init frame_filter_net_init(struct net *net) static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 4a98804203b..6dc2f878ae0 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -107,7 +107,7 @@ static int __net_init frame_nat_net_init(struct net *net) static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c77bab98669..1aa0e4c1f52 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -561,13 +561,14 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, } static inline int -ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) +ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.match; par.matchinfo = m->data; par.family = NFPROTO_BRIDGE; @@ -595,7 +596,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) } static inline int -ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) +ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) { struct xt_tgdtor_param par; struct ebt_entry_target *t; @@ -606,7 +607,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) if (cnt && (*cnt)-- == 0) return 1; EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); - EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); par.target = t->u.target; @@ -731,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e, cleanup_watchers: EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); cleanup_matches: - EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; } @@ -924,7 +925,7 @@ static int translate_table(struct net *net, char *name, ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, &i); + ebt_cleanup_entry, net, &i); } vfree(cl_s); return ret; @@ -1074,7 +1075,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); vfree(table->entries); if (table->chainstack) { @@ -1091,7 +1092,7 @@ free_unlock: mutex_unlock(&ebt_mutex); free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ @@ -1208,7 +1209,7 @@ out: return ERR_PTR(ret); } -void ebt_unregister_table(struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table) { int i; @@ -1220,7 +1221,7 @@ void ebt_unregister_table(struct ebt_table *table) list_del(&table->list); mutex_unlock(&ebt_mutex); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); if (table->private->nentries) module_put(table->me); vfree(table->private->entries); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a069d72d948..cfaba0e2e6f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -553,13 +553,14 @@ mark_source_chains(struct xt_table_info *newinfo, } static int -cleanup_match(struct ipt_entry_match *m, unsigned int *i) +cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; @@ -705,7 +706,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, err: module_put(t->u.kernel.target->me); cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + IPT_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -775,7 +776,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, } static int -cleanup_entry(struct ipt_entry *e, unsigned int *i) +cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; struct ipt_entry_target *t; @@ -784,7 +785,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) return 1; /* Cleanup all matches */ - IPT_MATCH_ITERATE(e, cleanup_match, NULL); + IPT_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ipt_get_target(e); par.target = t->u.kernel.target; @@ -866,7 +867,7 @@ translate_table(struct net *net, if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, net, &i); return ret; } @@ -1260,7 +1261,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + net, NULL); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1320,7 +1321,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1682,7 +1683,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, return 0; cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + IPT_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -1782,7 +1783,7 @@ translate_compat_table(struct net *net, j -= i; COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, compat_release_entry, &j); - IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i); xt_free_table_info(newinfo); return ret; } @@ -1853,7 +1854,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -2112,7 +2113,7 @@ out: return ERR_PTR(ret); } -void ipt_unregister_table(struct xt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2122,7 +2123,7 @@ void ipt_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd68e..dee90eb8aa4 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -138,7 +138,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_filter); + ipt_unregister_table(net, net->ipv4.iptable_filter); } static struct pernet_operations iptable_filter_net_ops = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index fae78c3076c..e07bf242343 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -208,7 +208,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_mangle); + ipt_unregister_table(net, net->ipv4.iptable_mangle); } static struct pernet_operations iptable_mangle_net_ops = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23be0..40f2b9f611a 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -100,7 +100,7 @@ static int __net_init iptable_raw_net_init(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_raw); + ipt_unregister_table(net, net->ipv4.iptable_raw); } static struct pernet_operations iptable_raw_net_ops = { diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 3bd3d6388da..7ce2366e430 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -138,7 +138,7 @@ static int __net_init iptable_security_net_init(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_security); + ipt_unregister_table(net, net->ipv4.iptable_security); } static struct pernet_operations iptable_security_net_ops = { diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0dfb4e..85da34fdc75 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -195,7 +195,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) static void __net_exit nf_nat_rule_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.nat_table); + ipt_unregister_table(net, net->ipv4.nat_table); } static struct pernet_operations nf_nat_rule_net_ops = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a825940a92e..9f1d45f2ba8 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -585,13 +585,14 @@ mark_source_chains(struct xt_table_info *newinfo, } static int -cleanup_match(struct ip6t_entry_match *m, unsigned int *i) +cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; @@ -737,7 +738,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, err: module_put(t->u.kernel.target->me); cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + IP6T_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -807,7 +808,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, } static int -cleanup_entry(struct ip6t_entry *e, unsigned int *i) +cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; struct ip6t_entry_target *t; @@ -816,7 +817,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) return 1; /* Cleanup all matches */ - IP6T_MATCH_ITERATE(e, cleanup_match, NULL); + IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ip6t_get_target(e); par.target = t->u.kernel.target; @@ -898,7 +899,7 @@ translate_table(struct net *net, if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, net, &i); return ret; } @@ -1293,7 +1294,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + net, NULL); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1353,7 +1354,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1692,14 +1693,15 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, return ret; } -static int compat_check_entry(struct ip6t_entry *e, const char *name, - unsigned int *i) +static int compat_check_entry(struct ip6t_entry *e, struct net *net, + const char *name, unsigned int *i) { unsigned int j; int ret; struct xt_mtchk_param mtpar; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; @@ -1716,12 +1718,13 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name, return 0; cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + IP6T_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1810,12 +1813,12 @@ translate_compat_table(const char *name, i = 0; ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + net, name, &i); if (ret) { j -= i; COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, compat_release_entry, &j); - IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i); xt_free_table_info(newinfo); return ret; } @@ -1870,7 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -1886,7 +1889,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -2144,7 +2147,7 @@ out: return ERR_PTR(ret); } -void ip6t_unregister_table(struct xt_table *table) +void ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2154,7 +2157,7 @@ void ip6t_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ad378efd0eb..33ddfe53e18 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -131,7 +131,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_filter); + ip6t_unregister_table(net, net->ipv6.ip6table_filter); } static struct pernet_operations ip6table_filter_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a929c19d30e..9bc483f000e 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -182,7 +182,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) static void __net_exit ip6table_mangle_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_mangle); + ip6t_unregister_table(net, net->ipv6.ip6table_mangle); } static struct pernet_operations ip6table_mangle_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ed1a1180f3b..4c90b552e43 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -94,7 +94,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) static void __net_exit ip6table_raw_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_raw); + ip6t_unregister_table(net, net->ipv6.ip6table_raw); } static struct pernet_operations ip6table_raw_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 41b444c6093..baa8d4ef3b0 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -134,7 +134,7 @@ static int __net_init ip6table_security_net_init(struct net *net) static void __net_exit ip6table_security_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_security); + ip6t_unregister_table(net, net->ipv6.ip6table_security); } static struct pernet_operations ip6table_security_net_ops = { -- cgit v1.2.3-70-g09d2 From d2d4e780aff2fab46a792ebc89f80d1a6872b325 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 18 Jan 2010 07:20:28 +0000 Subject: ide: add drive->pio_mode field Add pio_mode field to ide_drive_t matching pio_mode field used in struct ata_device. The validity of the field is restricted to ->set_pio_mode method only currently in IDE subsystem. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-devsets.c | 2 ++ drivers/ide/ide-probe.c | 2 ++ drivers/ide/ide-xfer-mode.c | 3 +++ include/linux/ide.h | 1 + 4 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index 1099bf7cf96..cb3341ce655 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -105,6 +105,8 @@ static int set_pio_mode(ide_drive_t *drive, int arg) return -ENOSYS; if (set_pio_mode_abuse(drive->hwif, arg)) { + drive->pio_mode = arg + XFER_PIO_0; + if (arg == 8 || arg == 9) { unsigned long flags; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 4d76ba47309..9a9f10f4cf9 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1043,6 +1043,8 @@ static void ide_port_init_devices(ide_hwif_t *hwif) if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS) drive->dev_flags |= IDE_DFLAG_NO_UNMASK; + drive->pio_mode = XFER_PIO_0; + if (port_ops && port_ops->init_dev) port_ops->init_dev(drive); } diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index 46d203ce60c..cdae463f6b4 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -135,6 +135,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode) * set transfer mode on the device in ->set_pio_mode method... */ if (port_ops->set_dma_mode == NULL) { + drive->pio_mode = mode; port_ops->set_pio_mode(drive, mode - XFER_PIO_0); return 0; } @@ -142,9 +143,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode) if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) { if (ide_config_drive_speed(drive, mode)) return -1; + drive->pio_mode = mode; port_ops->set_pio_mode(drive, mode - XFER_PIO_0); return 0; } else { + drive->pio_mode = mode; port_ops->set_pio_mode(drive, mode - XFER_PIO_0); return ide_config_drive_speed(drive, mode); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 0ec61295904..b5d2e965505 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -515,6 +515,7 @@ struct ide_drive_s { u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ + u8 pio_mode; /* for ->set_pio_mode _only_ */ u8 dn; /* now wide spread use */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ -- cgit v1.2.3-70-g09d2 From 3fccaa192b9501e79a57e02e62b6bf420d2b461e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 18 Jan 2010 07:20:35 +0000 Subject: ide: add drive->dma_mode field Add dma_mode field to ide_drive_t matching dma_mode field used in struct ata_device. The validity of the field is restricted to ->dma_pio_mode method only currently in IDE subsystem. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/aec62xx.c | 1 + drivers/ide/ide-xfer-mode.c | 2 ++ include/linux/ide.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c index 878f8ec6dbe..4c869872eb9 100644 --- a/drivers/ide/aec62xx.c +++ b/drivers/ide/aec62xx.c @@ -136,6 +136,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio) { + drive->dma_mode = pio + XFER_PIO_0; drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0); } diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index cdae463f6b4..c2323869d92 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -167,9 +167,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode) if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) { if (ide_config_drive_speed(drive, mode)) return -1; + drive->dma_mode = mode; port_ops->set_dma_mode(drive, mode); return 0; } else { + drive->dma_mode = mode; port_ops->set_dma_mode(drive, mode); return ide_config_drive_speed(drive, mode); } diff --git a/include/linux/ide.h b/include/linux/ide.h index b5d2e965505..746ef9fdabc 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -516,6 +516,7 @@ struct ide_drive_s { u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ u8 pio_mode; /* for ->set_pio_mode _only_ */ + u8 dma_mode; /* for ->dma_pio_mode _only_ */ u8 dn; /* now wide spread use */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ -- cgit v1.2.3-70-g09d2 From e085b3cae85af47eb0a3eda3186bd898310fb322 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 19 Jan 2010 01:44:41 -0800 Subject: ide: change ->set_pio_mode method parameters Change ->set_pio_mode method parameters to match ->set_piomode method used in struct ata_port_operations. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/aec62xx.c | 6 +++--- drivers/ide/ali14xx.c | 3 ++- drivers/ide/alim15x3.c | 7 +++---- drivers/ide/amd74xx.c | 4 ++-- drivers/ide/at91_ide.c | 5 +++-- drivers/ide/atiixp.c | 7 ++++--- drivers/ide/au1xxx-ide.c | 5 ++--- drivers/ide/cmd640.c | 3 ++- drivers/ide/cmd64x.c | 4 +++- drivers/ide/cs5520.c | 7 ++++--- drivers/ide/cs5530.c | 7 ++++--- drivers/ide/cs5535.c | 6 +++--- drivers/ide/cs5536.c | 7 ++++--- drivers/ide/cy82c693.c | 5 ++--- drivers/ide/dtc2278.c | 4 ++-- drivers/ide/hpt366.c | 4 ++-- drivers/ide/ht6560b.c | 3 ++- drivers/ide/ide-devsets.c | 4 ++-- drivers/ide/ide-xfer-mode.c | 6 +++--- drivers/ide/it8172.c | 10 +++++----- drivers/ide/it8213.c | 14 +++++++------- drivers/ide/it821x.c | 6 +++--- drivers/ide/jmicron.c | 2 +- drivers/ide/opti621.c | 6 +++--- drivers/ide/palm_bk3710.c | 5 +++-- drivers/ide/pdc202xx_new.c | 4 ++-- drivers/ide/pdc202xx_old.c | 4 ++-- drivers/ide/piix.c | 14 +++++++------- drivers/ide/pmac.c | 5 ++--- drivers/ide/qd65xx.c | 10 ++++------ drivers/ide/sc1200.c | 4 ++-- drivers/ide/scc_pata.c | 6 +++--- drivers/ide/serverworks.c | 5 +++-- drivers/ide/siimage.c | 6 +++--- drivers/ide/sis5513.c | 4 ++-- drivers/ide/sl82c105.c | 5 +++-- drivers/ide/slc90e66.c | 13 +++++++------ drivers/ide/tc86c001.c | 4 ++-- drivers/ide/triflex.c | 4 ++-- drivers/ide/tx4938ide.c | 5 ++--- drivers/ide/tx4939ide.c | 4 ++-- drivers/ide/umc8672.c | 5 +++-- drivers/ide/via82cxxx.c | 6 +++--- include/linux/ide.h | 2 +- 44 files changed, 129 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c index 4c869872eb9..3790847361c 100644 --- a/drivers/ide/aec62xx.c +++ b/drivers/ide/aec62xx.c @@ -134,10 +134,10 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) local_irq_restore(flags); } -static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - drive->dma_mode = pio + XFER_PIO_0; - drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0); + drive->dma_mode = drive->pio_mode; + hwif->port_ops->set_dma_mode(drive, drive->dma_mode); } static int init_chipset_aec62xx(struct pci_dev *dev) diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c index 90da1f953ed..25b9fe3a9f8 100644 --- a/drivers/ide/ali14xx.c +++ b/drivers/ide/ali14xx.c @@ -109,13 +109,14 @@ static DEFINE_SPINLOCK(ali14xx_lock); * This function computes timing parameters * and sets controller registers accordingly. */ -static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void ali14xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { int driveNum; int time1, time2; u8 param1, param2, param3, param4; unsigned long flags; int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50; + const u8 pio = drive->pio_mode - XFER_PIO_0; struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); /* calculate timing, according to PIO mode */ diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 8f03cce055f..28cee1055f7 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -63,15 +63,14 @@ static void ali_fifo_control(ide_hwif_t *hwif, ide_drive_t *drive, int on) /** * ali_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Program the controller for the given PIO mode. */ -static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void ali_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int bus_speed = ide_pci_clk ? ide_pci_clk : 33; unsigned long T = 1000000 / bus_speed; /* PCI clock based */ @@ -79,7 +78,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 unit = drive->dn & 1; struct ide_timing t; - ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1); + ide_timing_compute(drive, drive->pio_mode, &t, T, 1); t.setup = clamp_val(t.setup, 1, 8) & 7; t.active = clamp_val(t.active, 1, 8) & 7; diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c index 108e9b67685..3eee7be7ca6 100644 --- a/drivers/ide/amd74xx.c +++ b/drivers/ide/amd74xx.c @@ -108,9 +108,9 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed) * amd_set_pio_mode() is a callback from upper layers for PIO-only tuning. */ -static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - amd_set_drive(drive, XFER_PIO_0 + pio); + amd_set_drive(drive, drive->pio_mode); } static void amd7409_cable_detect(struct pci_dev *dev) diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 248219a89a6..000a78e5246 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -172,11 +172,12 @@ static void at91_ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, leave_16bit(chipselect, mode); } -static void at91_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void at91_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { struct ide_timing *timing; - u8 chipselect = drive->hwif->select_data; + u8 chipselect = hwif->select_data; int use_iordy = 0; + const u8 pio = drive->pio_mode - XFER_PIO_0; pdbg("chipselect %u pio %u\n", chipselect, pio); diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c index 837322b10a4..b6848dfb93b 100644 --- a/drivers/ide/atiixp.c +++ b/drivers/ide/atiixp.c @@ -42,19 +42,20 @@ static DEFINE_SPINLOCK(atiixp_lock); /** * atiixp_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Set the interface PIO mode. */ -static void atiixp_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - struct pci_dev *dev = to_pci_dev(drive->hwif->dev); + struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned long flags; int timing_shift = (drive->dn ^ 1) * 8; u32 pio_timing_data; u16 pio_mode_data; + const u8 pio = drive->pio_mode - XFER_PIO_0; spin_lock_irqsave(&atiixp_lock, flags); diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 87cef0c440a..c90e9b0a9f6 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -99,12 +99,11 @@ static void au1xxx_output_data(ide_drive_t *drive, struct ide_cmd *cmd, } #endif -static void au1xxx_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2); - /* set pio mode! */ - switch(pio) { + switch (drive->pio_mode - XFER_PIO_0) { case 0: mem_sttime = SBC_IDE_TIMING(PIO0); diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index 1a32d62ed86..c7d46a3d347 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -572,9 +572,10 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index, program_drive_counts(drive, index); } -static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cmd640_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned int index = 0, cycle_time; + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 b; switch (pio) { diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index 9f89f3116df..0b11745937e 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -127,8 +127,10 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) * Special cases are 8: prefetch off, 9: prefetch on (both never worked) */ -static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { + const u8 pio = drive->pio_mode - XFER_PIO_0; + /* * Filter out the prefetch control values * to prevent PIO5 from being programmed diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c index 09f98ed0731..b8094f049f3 100644 --- a/drivers/ide/cs5520.c +++ b/drivers/ide/cs5520.c @@ -57,11 +57,11 @@ static struct pio_clocks cs5520_pio_clocks[]={ {1, 2, 1} }; -static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *pdev = to_pci_dev(hwif->dev); int controller = drive->dn > 1 ? 1 : 0; + const u8 pio = drive->pio_mode - XFER_PIO_0; /* 8bit CAT/CRT - 8bit command timing for channel */ pci_write_config_byte(pdev, 0x62 + controller, @@ -85,7 +85,8 @@ static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed) { printk(KERN_ERR "cs55x0: bad ide timing.\n"); - cs5520_set_pio_mode(drive, 0); + drive->pio_mode = XFER_PIO_0 + 0; + cs5520_set_pio_mode(drive->hwif, drive); } static const struct ide_port_ops cs5520_port_ops = { diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c index 40bf05eddf6..4ced40255ad 100644 --- a/drivers/ide/cs5530.c +++ b/drivers/ide/cs5530.c @@ -41,8 +41,8 @@ static unsigned int cs5530_pio_timings[2][5] = { /** * cs5530_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Handles setting of PIO mode for the chipset. * @@ -50,10 +50,11 @@ static unsigned int cs5530_pio_timings[2][5] = { * will have valid default PIO timings set up before we get here. */ -static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cs5530_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - unsigned long basereg = CS5530_BASEREG(drive->hwif); + unsigned long basereg = CS5530_BASEREG(hwif); unsigned int format = (inl(basereg + 4) >> 31) & 1; + const u8 pio = drive->pio_mode - XFER_PIO_0; outl(cs5530_pio_timings[format][pio], basereg + ((drive->dn & 1)<<3)); } diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c index b883838adc2..7974415ea89 100644 --- a/drivers/ide/cs5535.c +++ b/drivers/ide/cs5535.c @@ -142,15 +142,15 @@ static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed) /** * cs5535_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * A callback from the upper layers for PIO-only tuning. */ -static void cs5535_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cs5535_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - cs5535_set_speed(drive, XFER_PIO_0 + pio); + cs5535_set_speed(drive, drive->pio_mode); } static u8 cs5535_cable_detect(ide_hwif_t *hwif) diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c index 9623b852c61..b518ef0e9a3 100644 --- a/drivers/ide/cs5536.c +++ b/drivers/ide/cs5536.c @@ -125,11 +125,11 @@ static u8 cs5536_cable_detect(ide_hwif_t *hwif) /** * cs5536_set_pio_mode - PIO timing setup + * @hwif: ATA port * @drive: ATA device - * @pio: PIO mode number */ -static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u8 drv_timings[5] = { 0x98, 0x55, 0x32, 0x21, 0x20, @@ -143,11 +143,12 @@ static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio) 0x99, 0x92, 0x90, 0x22, 0x20, }; - struct pci_dev *pdev = to_pci_dev(drive->hwif->dev); + struct pci_dev *pdev = to_pci_dev(hwif->dev); ide_drive_t *pair = ide_get_pair_dev(drive); int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT; unsigned long timings = (unsigned long)ide_get_drivedata(drive); u32 cast; + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 cmd_pio = pio; if (pair) diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c index fbf3dcc2657..ead65c394f0 100644 --- a/drivers/ide/cy82c693.c +++ b/drivers/ide/cy82c693.c @@ -80,9 +80,8 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode) outb(data, CY82_DATA_PORT); } -static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int bus_speed = ide_pci_clk ? ide_pci_clk : 33; const unsigned long T = 1000000 / bus_speed; @@ -101,7 +100,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) } } - ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1); + ide_timing_compute(drive, drive->pio_mode, &t, T, 1); time_16 = clamp_val(t.recover - 1, 0, 15) | (clamp_val(t.active - 1, 0, 15) << 4); diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c index c6b13812298..6929f7fce93 100644 --- a/drivers/ide/dtc2278.c +++ b/drivers/ide/dtc2278.c @@ -68,11 +68,11 @@ static void sub22 (char b, char c) static DEFINE_SPINLOCK(dtc2278_lock); -static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void dtc2278_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned long flags; - if (pio >= 3) { + if (drive->pio_mode >= XFER_PIO_3) { spin_lock_irqsave(&dtc2278_lock, flags); /* * This enables PIO mode4 (3?) on the first interface diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 4d90ac2dbb1..f1dec519a9e 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -651,9 +651,9 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed) pci_write_config_dword(dev, itr_addr, new_itr); } -static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - hpt3xx_set_mode(drive, XFER_PIO_0 + pio); + hpt3xx_set_mode(drive, drive->pio_mode); } static void hpt3xx_maskproc(ide_drive_t *drive, int mask) diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c index aafed8060e1..d81e49680c3 100644 --- a/drivers/ide/ht6560b.c +++ b/drivers/ide/ht6560b.c @@ -279,9 +279,10 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state) #endif } -static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void ht6560b_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned long flags, config; + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 timing; switch (pio) { diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index cb3341ce655..c6935c78757 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -112,10 +112,10 @@ static int set_pio_mode(ide_drive_t *drive, int arg) /* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */ spin_lock_irqsave(&hwif->lock, flags); - port_ops->set_pio_mode(drive, arg); + port_ops->set_pio_mode(hwif, drive); spin_unlock_irqrestore(&hwif->lock, flags); } else - port_ops->set_pio_mode(drive, arg); + port_ops->set_pio_mode(hwif, drive); } else { int keep_dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index c2323869d92..a62fb03fc1c 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -136,7 +136,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode) */ if (port_ops->set_dma_mode == NULL) { drive->pio_mode = mode; - port_ops->set_pio_mode(drive, mode - XFER_PIO_0); + port_ops->set_pio_mode(hwif, drive); return 0; } @@ -144,11 +144,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode) if (ide_config_drive_speed(drive, mode)) return -1; drive->pio_mode = mode; - port_ops->set_pio_mode(drive, mode - XFER_PIO_0); + port_ops->set_pio_mode(hwif, drive); return 0; } else { drive->pio_mode = mode; - port_ops->set_pio_mode(drive, mode - XFER_PIO_0); + port_ops->set_pio_mode(hwif, drive); return ide_config_drive_speed(drive, mode); } } diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c index 0d266a5b524..9dfdc8741a7 100644 --- a/drivers/ide/it8172.c +++ b/drivers/ide/it8172.c @@ -37,12 +37,12 @@ #define DRV_NAME "IT8172" -static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u16 drive_enables; u32 drive_timing; + const u8 pio = drive->pio_mode - XFER_PIO_0; /* * The highest value of DIOR/DIOW pulse width and recovery time @@ -98,14 +98,14 @@ static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x4a, reg4a | u_speed); } else { const u8 mwdma_to_pio[] = { 0, 3, 4 }; - u8 pio; pci_write_config_byte(dev, 0x48, reg48 & ~u_flag); pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed); - pio = mwdma_to_pio[speed - XFER_MW_DMA_0]; + drive->pio_mode = + mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0; - it8172_set_pio_mode(drive, pio); + it8172_set_pio_mode(hwif, drive); } } diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c index 47976167796..492c07d5f4f 100644 --- a/drivers/ide/it8213.c +++ b/drivers/ide/it8213.c @@ -17,15 +17,14 @@ /** * it8213_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Set the interface PIO mode. */ -static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = 0x40; @@ -35,6 +34,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 slave_data; static DEFINE_SPINLOCK(tune_lock); int control = 0; + const u8 pio = drive->pio_mode - XFER_PIO_0; static const u8 timings[][2] = { { 0, 0 }, @@ -120,7 +120,6 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x54, reg54 & ~v_flag); } else { const u8 mwdma_to_pio[] = { 0, 3, 4 }; - u8 pio; if (reg48 & u_flag) pci_write_config_byte(dev, 0x48, reg48 & ~u_flag); @@ -132,11 +131,12 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag); if (speed >= XFER_MW_DMA_0) - pio = mwdma_to_pio[speed - XFER_MW_DMA_0]; + drive->pio_mode = + mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0; else - pio = 2; /* only SWDMA2 is allowed */ + drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */ - it8213_set_pio_mode(drive, pio); + it8213_set_pio_mode(hwif, drive); } } diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 51aa745246d..69becb7b965 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -228,18 +228,18 @@ static void it821x_clock_strategy(ide_drive_t *drive) /** * it821x_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Tune the host to the desired PIO mode taking into the consideration * the maximum PIO mode supported by the other device on the cable. */ -static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void it821x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct it821x_dev *itdev = ide_get_hwifdata(hwif); ide_drive_t *pair = ide_get_pair_dev(drive); + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 unit = drive->dn & 1, set_pio = pio; /* Spec says 89 ref driver uses 88 */ diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c index bf2be6431b2..ebffb904ed2 100644 --- a/drivers/ide/jmicron.c +++ b/drivers/ide/jmicron.c @@ -80,7 +80,7 @@ static u8 jmicron_cable_detect(ide_hwif_t *hwif) return ATA_CBL_PATA80; } -static void jmicron_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { } diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c index 2052788fab7..1a53a4c375e 100644 --- a/drivers/ide/opti621.c +++ b/drivers/ide/opti621.c @@ -62,12 +62,12 @@ static u8 read_reg(int reg) return ret; } -static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void opti621_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; ide_drive_t *pair = ide_get_pair_dev(drive); unsigned long flags; - unsigned long mode = XFER_PIO_0 + pio, pair_mode; + unsigned long mode = drive->pio_mode, pair_mode; + const u8 pio = mode - XFER_PIO_0; u8 tim, misc, addr_pio = pio, clk; /* DRDY is default 2 (by OPTi Databook) */ diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index f8eddf05ecb..0f262d07c37 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -203,12 +203,13 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed) } } -static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio) +static void palm_bk3710_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned int cycle_time; int is_slave = drive->dn & 1; ide_drive_t *mate; - void __iomem *base = (void *)drive->hwif->dma_base; + void __iomem *base = (void *)hwif->dma_base; + const u8 pio = drive->pio_mode - XFER_PIO_0; /* * Obtain the drive PIO data for tuning the Palm Chip registers diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c index 65ba8239e7b..874acd2bb6e 100644 --- a/drivers/ide/pdc202xx_new.c +++ b/drivers/ide/pdc202xx_new.c @@ -167,11 +167,11 @@ static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed) } } -static void pdcnew_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void pdcnew_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 adj = (drive->dn & 1) ? 0x08 : 0x00; + const u8 pio = drive->pio_mode - XFER_PIO_0; if (max_dma_rate(dev) == 4) { set_indexed_reg(hwif, 0x0c + adj, pio_timings[pio].reg0c); diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index 1d20594ee42..402aab7f3ba 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -76,9 +76,9 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) } } -static void pdc202xx_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - pdc202xx_set_mode(drive, XFER_PIO_0 + pio); + pdc202xx_set_mode(drive, drive->pio_mode); } static int pdc202xx_test_irq(ide_hwif_t *hwif) diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c index bf14f39bd3a..64b3041daa6 100644 --- a/drivers/ide/piix.c +++ b/drivers/ide/piix.c @@ -59,15 +59,14 @@ static int no_piix_dma; /** * piix_set_pio_mode - set host controller for PIO mode + * @port: port * @drive: drive - * @pio: PIO mode number * * Set the interface PIO mode based upon the settings done by AMI BIOS. */ -static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = hwif->channel ? 0x42 : 0x40; @@ -77,6 +76,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio) u8 slave_data; static DEFINE_SPINLOCK(tune_lock); int control = 0; + const u8 pio = drive->pio_mode - XFER_PIO_0; /* ISP RTC */ static const u8 timings[][2]= { @@ -176,7 +176,6 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x54, reg54 & ~v_flag); } else { const u8 mwdma_to_pio[] = { 0, 3, 4 }; - u8 pio; if (reg48 & u_flag) pci_write_config_byte(dev, 0x48, reg48 & ~u_flag); @@ -188,11 +187,12 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag); if (speed >= XFER_MW_DMA_0) - pio = mwdma_to_pio[speed - XFER_MW_DMA_0]; + drive->pio_mode = + mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0; else - pio = 2; /* only SWDMA2 is allowed */ + drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */ - piix_set_pio_mode(drive, pio); + piix_set_pio_mode(hwif, drive); } } diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 7a4e788cab2..a167968a2d4 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -496,12 +496,11 @@ static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl) /* * Old tuning functions (called on hdparm -p), sets up drive PIO timings */ -static void -pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void pmac_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent); + const u8 pio = drive->pio_mode - XFER_PIO_0; struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio); u32 *timings, t; unsigned accessTicks, recTicks; diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c index 74696edc8d1..3f0244fd8e6 100644 --- a/drivers/ide/qd65xx.c +++ b/drivers/ide/qd65xx.c @@ -189,15 +189,13 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing) printk(KERN_DEBUG "%s: %#x\n", drive->name, timing); } -static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void qd6500_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { u16 *id = drive->id; int active_time = 175; int recovery_time = 415; /* worst case values from the dos driver */ - /* - * FIXME: use "pio" value - */ + /* FIXME: use drive->pio_mode value */ if (!qd_find_disk_type(drive, &active_time, &recovery_time) && (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) && id[ATA_ID_EIDE_PIO] >= 240) { @@ -211,9 +209,9 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) active_time, recovery_time)); } -static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void qd6580_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; + const u8 pio = drive->pio_mode - XFER_PIO_0; struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; int active_time = 175; diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index d467478d68d..bb0166e460a 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -193,10 +193,10 @@ static int sc1200_dma_end(ide_drive_t *drive) * will have valid default PIO timings set up before we get here. */ -static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void sc1200_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; int mode = -1; + const u8 pio = drive->pio_mode - XFER_PIO_0; /* * bad abuse of ->set_pio_mode interface diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 1104bb301eb..23e16e4460e 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -199,16 +199,15 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count) /** * scc_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Load the timing settings for this device mode into the * controller. */ -static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct scc_ports *ports = ide_get_hwifdata(hwif); unsigned long ctl_base = ports->ctl; unsigned long cckctrl_port = ctl_base + 0xff0; @@ -216,6 +215,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio) unsigned long pioct_port = ctl_base + 0x004; unsigned long reg; int offset; + const u8 pio = drive->pio_mode - XFER_PIO_0; reg = in_be32((void __iomem *)cckctrl_port); if (reg & CCKCTRL_ATACLKOEN) { diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index 657f0433ec5..a56bc51ae03 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -106,12 +106,13 @@ static u8 svwks_csb_check (struct pci_dev *dev) return 0; } -static void svwks_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u8 pio_modes[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 }; static const u8 drive_pci[] = { 0x41, 0x40, 0x43, 0x42 }; - struct pci_dev *dev = to_pci_dev(drive->hwif->dev); + struct pci_dev *dev = to_pci_dev(hwif->dev); + const u8 pio = drive->pio_mode - XFER_PIO_0; pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index d95df528562..97266958f74 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -229,19 +229,18 @@ static u8 sil_sata_udma_filter(ide_drive_t *drive) /** * sil_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * Load the timing settings for this device mode into the * controller. */ -static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) +static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u16 tf_speed[] = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 }; static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 }; - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); ide_drive_t *pair = ide_get_pair_dev(drive); u32 speedt = 0; @@ -249,6 +248,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) unsigned long addr = siimage_seldev(drive, 0x04); unsigned long tfaddr = siimage_selreg(hwif, 0x02); unsigned long base = (unsigned long)hwif->hwif_data; + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 tf_pio = pio; u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; u8 addr_mask = hwif->channel ? (mmio ? 0xF4 : 0x84) diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c index 468706082fb..5a019206053 100644 --- a/drivers/ide/sis5513.c +++ b/drivers/ide/sis5513.c @@ -290,10 +290,10 @@ static void config_drive_art_rwp(ide_drive_t *drive) pci_write_config_byte(dev, 0x4b, rw_prefetch); } -static void sis_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void sis_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { config_drive_art_rwp(drive); - sis_program_timings(drive, XFER_PIO_0 + pio); + sis_program_timings(drive, drive->pio_mode); } static void sis_ata133_program_udma_timings(ide_drive_t *drive, const u8 mode) diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index 3c2bbf0057e..419cd3bc6c8 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -63,12 +63,13 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) /* * Configure the chipset for PIO mode. */ -static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - struct pci_dev *dev = to_pci_dev(drive->hwif->dev); + struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned long timings = (unsigned long)ide_get_drivedata(drive); int reg = 0x44 + drive->dn * 4; u16 drv_ctrl; + const u8 pio = drive->pio_mode - XFER_PIO_0; drv_ctrl = get_pio_timings(drive, pio); diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c index 1ccfb40e721..019777522cd 100644 --- a/drivers/ide/slc90e66.c +++ b/drivers/ide/slc90e66.c @@ -18,9 +18,8 @@ static DEFINE_SPINLOCK(slc90e66_lock); -static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = hwif->channel ? 0x42 : 0x40; @@ -29,6 +28,8 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio) u16 master_data; u8 slave_data; int control = 0; + const u8 pio = drive->pio_mode - XFER_PIO_0; + /* ISP RTC */ static const u8 timings[][2] = { { 0, 0 }, @@ -98,7 +99,6 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed) } } else { const u8 mwdma_to_pio[] = { 0, 3, 4 }; - u8 pio; if (reg48 & u_flag) pci_write_config_word(dev, 0x48, reg48 & ~u_flag); @@ -106,11 +106,12 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_word(dev, 0x4a, reg4a & ~a_speed); if (speed >= XFER_MW_DMA_0) - pio = mwdma_to_pio[speed - XFER_MW_DMA_0]; + drive->pio_mode = + mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0; else - pio = 2; /* only SWDMA2 is allowed */ + drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */ - slc90e66_set_pio_mode(drive, pio); + slc90e66_set_pio_mode(hwif, drive); } } diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c index 05a93d6baec..f2cb62bf3f2 100644 --- a/drivers/ide/tc86c001.c +++ b/drivers/ide/tc86c001.c @@ -41,9 +41,9 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed) outw(scr, scr_port); } -static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - tc86c001_set_mode(drive, XFER_PIO_0 + pio); + tc86c001_set_mode(drive, drive->pio_mode); } /* diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c index 8773c3ba746..d34a7eecdea 100644 --- a/drivers/ide/triflex.c +++ b/drivers/ide/triflex.c @@ -82,9 +82,9 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed) pci_write_config_dword(dev, channel_offset, triflex_timings); } -static void triflex_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - triflex_set_mode(drive, XFER_PIO_0 + pio); + triflex_set_mode(drive, drive->pio_mode); } static const struct ide_port_ops triflex_port_ops = { diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index fd59c0d235b..326d4683488 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -56,11 +56,10 @@ static void tx4938ide_tune_ebusc(unsigned int ebus_ch, &tx4938_ebuscptr->cr[ebus_ch]); } -static void tx4938ide_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void tx4938ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct tx4938ide_platform_info *pdata = hwif->dev->platform_data; - u8 safe = pio; + u8 safe = drive->pio_mode - XFER_PIO_0; ide_drive_t *pair; pair = ide_get_pair_dev(drive); diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 64b58ecc3f0..5228a4786de 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -104,11 +104,11 @@ static void tx4939ide_writeb(u8 val, void __iomem *base, u32 reg) #define TX4939IDE_BASE(hwif) ((void __iomem *)(hwif)->extra_base) -static void tx4939ide_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; int is_slave = drive->dn; u32 mask, val; + const u8 pio = drive->pio_mode - XFER_PIO_0; u8 safe = pio; ide_drive_t *pair; diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c index 60f936e2319..47adcd09cb2 100644 --- a/drivers/ide/umc8672.c +++ b/drivers/ide/umc8672.c @@ -104,10 +104,11 @@ static void umc_set_speeds(u8 speeds[]) speeds[0], speeds[1], speeds[2], speeds[3]); } -static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void umc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate; + ide_hwif_t *mate = hwif->mate; unsigned long uninitialized_var(flags); + const u8 pio = drive->pio_mode - XFER_PIO_0; printk("%s: setting umc8672 to PIO mode%d (speed %d)\n", drive->name, pio, pio_to_umc[pio]); diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index fbecf8ea820..6d995fc9d4f 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -208,15 +208,15 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed) /** * via_set_pio_mode - set host controller for PIO mode + * @hwif: port * @drive: drive - * @pio: PIO mode number * * A callback from the upper layers for PIO-only tuning. */ -static void via_set_pio_mode(ide_drive_t *drive, const u8 pio) +static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - via_set_drive(drive, XFER_PIO_0 + pio); + via_set_drive(drive, drive->pio_mode); } static struct via_isa_bridge *via_config_find(struct pci_dev **isa) diff --git a/include/linux/ide.h b/include/linux/ide.h index 746ef9fdabc..803ec306883 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -624,7 +624,7 @@ extern const struct ide_tp_ops default_tp_ops; */ struct ide_port_ops { void (*init_dev)(ide_drive_t *); - void (*set_pio_mode)(ide_drive_t *, const u8); + void (*set_pio_mode)(struct hwif_s *, ide_drive_t *); void (*set_dma_mode)(ide_drive_t *, const u8); int (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); -- cgit v1.2.3-70-g09d2 From 8776168ca2151850164af1de5565d01f7b8b2c53 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 19 Jan 2010 01:45:29 -0800 Subject: ide: change ->set_dma_mode method parameters Change ->set_dma_mode method parameters to match ->set_dmamode method used in struct ata_port_operations. Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/aec62xx.c | 10 +++++----- drivers/ide/alim15x3.c | 6 +++--- drivers/ide/amd74xx.c | 7 ++++--- drivers/ide/atiixp.c | 7 ++++--- drivers/ide/au1xxx-ide.c | 4 ++-- drivers/ide/cmd64x.c | 4 ++-- drivers/ide/cs5520.c | 4 ++-- drivers/ide/cs5530.c | 6 +++--- drivers/ide/cs5535.c | 6 +++--- drivers/ide/cs5536.c | 7 ++++--- drivers/ide/cy82c693.c | 4 ++-- drivers/ide/hpt366.c | 7 ++++--- drivers/ide/icside.c | 3 ++- drivers/ide/ide-xfer-mode.c | 4 ++-- drivers/ide/it8172.c | 4 ++-- drivers/ide/it8213.c | 6 +++--- drivers/ide/it821x.c | 6 ++++-- drivers/ide/jmicron.c | 4 ++-- drivers/ide/palm_bk3710.c | 5 +++-- drivers/ide/pdc202xx_new.c | 4 ++-- drivers/ide/pdc202xx_old.c | 7 ++++--- drivers/ide/piix.c | 6 +++--- drivers/ide/pmac.c | 4 ++-- drivers/ide/sc1200.c | 4 ++-- drivers/ide/scc_pata.c | 6 +++--- drivers/ide/serverworks.c | 4 ++-- drivers/ide/sgiioc4.c | 2 +- drivers/ide/siimage.c | 6 +++--- drivers/ide/sis5513.c | 4 +++- drivers/ide/sl82c105.c | 3 ++- drivers/ide/slc90e66.c | 4 ++-- drivers/ide/tc86c001.c | 7 ++++--- drivers/ide/triflex.c | 8 ++++---- drivers/ide/tx4939ide.c | 4 ++-- drivers/ide/via82cxxx.c | 9 +++++---- include/linux/ide.h | 2 +- 36 files changed, 101 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c index 3790847361c..57d00caefc8 100644 --- a/drivers/ide/aec62xx.c +++ b/drivers/ide/aec62xx.c @@ -81,15 +81,15 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr return chipset_table->ultra_settings; } -static void aec6210_set_mode(ide_drive_t *drive, const u8 speed) +static void aec6210_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; u16 d_conf = 0; u8 ultra = 0, ultra_conf = 0; u8 tmp0 = 0, tmp1 = 0, tmp2 = 0; + const u8 speed = drive->dma_mode; unsigned long flags; local_irq_save(flags); @@ -109,15 +109,15 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed) local_irq_restore(flags); } -static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) +static void aec6260_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; u8 unit = drive->dn & 1; u8 tmp1 = 0, tmp2 = 0; u8 ultra = 0, drive_conf = 0, ultra_conf = 0; + const u8 speed = drive->dma_mode; unsigned long flags; local_irq_save(flags); @@ -137,7 +137,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { drive->dma_mode = drive->pio_mode; - hwif->port_ops->set_dma_mode(drive, drive->dma_mode); + hwif->port_ops->set_dma_mode(hwif, drive); } static int init_chipset_aec62xx(struct pci_dev *dev) diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 28cee1055f7..6f0debae4e2 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -121,16 +121,16 @@ static u8 ali_udma_filter(ide_drive_t *drive) /** * ali_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Configure the hardware for the desired IDE transfer mode. */ -static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void ali_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); + const u8 speed = drive->dma_mode; u8 speed1 = speed; u8 unit = drive->dn & 1; u8 tmpbyte = 0x00; diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c index 3eee7be7ca6..b7e10533820 100644 --- a/drivers/ide/amd74xx.c +++ b/drivers/ide/amd74xx.c @@ -79,14 +79,14 @@ static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask, * to a desired transfer mode. It also can be called by upper layers. */ -static void amd_set_drive(ide_drive_t *drive, const u8 speed) +static void amd_set_drive(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); ide_drive_t *peer = ide_get_pair_dev(drive); struct ide_timing t, p; int T, UT; u8 udma_mask = hwif->ultra_mask; + const u8 speed = drive->dma_mode; T = 1000000000 / amd_clock; UT = (udma_mask == ATA_UDMA2) ? T : (T / 2); @@ -110,7 +110,8 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed) static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - amd_set_drive(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + amd_set_drive(hwif, drive); } static void amd7409_cable_detect(struct pci_dev *dev) diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c index b6848dfb93b..15f0ead89f5 100644 --- a/drivers/ide/atiixp.c +++ b/drivers/ide/atiixp.c @@ -75,21 +75,22 @@ static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * atiixp_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Set a ATIIXP host controller to the desired DMA mode. This involves * programming the right timing data into the PCI configuration space. */ -static void atiixp_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void atiixp_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - struct pci_dev *dev = to_pci_dev(drive->hwif->dev); + struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned long flags; int timing_shift = (drive->dn ^ 1) * 8; u32 tmp32; u16 tmp16; u16 udma_ctl = 0; + const u8 speed = drive->dma_mode; spin_lock_irqsave(&atiixp_lock, flags); diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index c90e9b0a9f6..e2fd378ba9d 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -160,11 +160,11 @@ static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) au_writel(mem_stcfg,MEM_STCFG2); } -static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void auide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2); - switch(speed) { + switch (drive->dma_mode) { #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA case XFER_MW_DMA_2: mem_sttime = SBC_IDE_TIMING(MDMA2); diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index 0b11745937e..a65a6917125 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -141,12 +141,12 @@ static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) cmd64x_program_timings(drive, XFER_PIO_0 + pio); } -static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void cmd64x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 unit = drive->dn & 0x01; u8 regU = 0, pciU = hwif->channel ? UDIDETCR1 : UDIDETCR0; + const u8 speed = drive->dma_mode; pci_read_config_byte(dev, pciU, ®U); regU &= ~(unit ? 0xCA : 0x35); diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c index b8094f049f3..2c1e5f7cd26 100644 --- a/drivers/ide/cs5520.c +++ b/drivers/ide/cs5520.c @@ -81,12 +81,12 @@ static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) (cs5520_pio_clocks[pio].assert)); } -static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { printk(KERN_ERR "cs55x0: bad ide timing.\n"); drive->pio_mode = XFER_PIO_0 + 0; - cs5520_set_pio_mode(drive->hwif, drive); + cs5520_set_pio_mode(hwif, drive); } static const struct ide_port_ops cs5520_port_ops = { diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c index 4ced40255ad..4dc4eb92b07 100644 --- a/drivers/ide/cs5530.c +++ b/drivers/ide/cs5530.c @@ -100,12 +100,12 @@ out: return mask; } -static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void cs5530_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned long basereg; unsigned int reg, timings = 0; - switch (mode) { + switch (drive->dma_mode) { case XFER_UDMA_0: timings = 0x00921250; break; case XFER_UDMA_1: timings = 0x00911140; break; case XFER_UDMA_2: timings = 0x00911030; break; @@ -113,7 +113,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode) case XFER_MW_DMA_1: timings = 0x00012121; break; case XFER_MW_DMA_2: timings = 0x00002020; break; } - basereg = CS5530_BASEREG(drive->hwif); + basereg = CS5530_BASEREG(hwif); reg = inl(basereg + 4); /* get drive0 config register */ timings |= reg & 0x80000000; /* preserve PIO format bit */ if ((drive-> dn & 1) == 0) { /* are we configuring drive0? */ diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c index 7974415ea89..740002b2f3e 100644 --- a/drivers/ide/cs5535.c +++ b/drivers/ide/cs5535.c @@ -129,15 +129,15 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed) /** * cs5535_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Programs the chipset for DMA mode. */ -static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void cs5535_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - cs5535_set_speed(drive, speed); + cs5535_set_speed(drive, drive->dma_mode); } /** diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c index b518ef0e9a3..70871fbc3c0 100644 --- a/drivers/ide/cs5536.c +++ b/drivers/ide/cs5536.c @@ -173,11 +173,11 @@ static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * cs5536_set_dma_mode - DMA timing setup + * @hwif: ATA port * @drive: ATA device - * @mode: DMA mode */ -static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void cs5536_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u8 udma_timings[6] = { 0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6, @@ -187,10 +187,11 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode) 0x67, 0x21, 0x20, }; - struct pci_dev *pdev = to_pci_dev(drive->hwif->dev); + struct pci_dev *pdev = to_pci_dev(hwif->dev); int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT; unsigned long timings = (unsigned long)ide_get_drivedata(drive); u32 etc; + const u8 mode = drive->dma_mode; cs5536_read(pdev, ETC, &etc); diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c index ead65c394f0..9383f67deae 100644 --- a/drivers/ide/cy82c693.c +++ b/drivers/ide/cy82c693.c @@ -53,9 +53,9 @@ * set DMA mode a specific channel for CY82C693 */ -static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void cy82c693_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; + const u8 mode = drive->dma_mode; u8 single = (mode & 0x10) >> 4, index = 0, data = 0; index = hwif->channel ? CY82_INDEX_CHANNEL1 : CY82_INDEX_CHANNEL0; diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index f1dec519a9e..b885c1d548f 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -627,14 +627,14 @@ static u32 get_speed_setting(u8 speed, struct hpt_info *info) return info->timings->clock_table[info->clock][i]; } -static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed) +static void hpt3xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct hpt_info *info = hpt3xx_get_info(hwif->dev); struct hpt_timings *t = info->timings; u8 itr_addr = 0x40 + (drive->dn * 4); u32 old_itr = 0; + const u8 speed = drive->dma_mode; u32 new_itr = get_speed_setting(speed, info); u32 itr_mask = speed < XFER_MW_DMA_0 ? t->pio_mask : (speed < XFER_UDMA_0 ? t->dma_mask : @@ -653,7 +653,8 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed) static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - hpt3xx_set_mode(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + hpt3xx_set_mode(hwif, drive); } static void hpt3xx_maskproc(ide_drive_t *drive, int mask) diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 0f67f1abbbd..26b6c0a1f77 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -185,10 +185,11 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = { * MW1 80 50 50 150 C * MW2 70 25 25 120 C */ -static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode) +static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { unsigned long cycle_time; int use_dma_info = 0; + const u8 xfer_mode = drive->dma_mode; switch (xfer_mode) { case XFER_MW_DMA_2: diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index a62fb03fc1c..9b549e4d184 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -168,11 +168,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode) if (ide_config_drive_speed(drive, mode)) return -1; drive->dma_mode = mode; - port_ops->set_dma_mode(drive, mode); + port_ops->set_dma_mode(hwif, drive); return 0; } else { drive->dma_mode = mode; - port_ops->set_dma_mode(drive, mode); + port_ops->set_dma_mode(hwif, drive); return ide_config_drive_speed(drive, mode); } } diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c index 9dfdc8741a7..560e66d0765 100644 --- a/drivers/ide/it8172.c +++ b/drivers/ide/it8172.c @@ -77,14 +77,14 @@ static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) pci_write_config_dword(dev, 0x44, drive_timing); } -static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void it8172_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int a_speed = 3 << (drive->dn * 4); int u_flag = 1 << drive->dn; int u_speed = 0; u8 reg48, reg4a; + const u8 speed = drive->dma_mode; pci_read_config_byte(dev, 0x48, ®48); pci_read_config_byte(dev, 0x4a, ®4a); diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c index 492c07d5f4f..46816ba2641 100644 --- a/drivers/ide/it8213.c +++ b/drivers/ide/it8213.c @@ -74,15 +74,14 @@ static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * it8213_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Tune the ITE chipset for the DMA mode. */ -static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void it8213_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = 0x40; int a_speed = 3 << (drive->dn * 4); @@ -92,6 +91,7 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) int u_speed = 0; u16 reg4042, reg4a; u8 reg48, reg54, reg55; + const u8 speed = drive->dma_mode; pci_read_config_word(dev, maslave, ®4042); pci_read_config_byte(dev, 0x48, ®48); diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 69becb7b965..56b79194156 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -393,14 +393,16 @@ static int it821x_dma_end(ide_drive_t *drive) /** * it821x_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Tune the ITE chipset for the desired DMA mode. */ -static void it821x_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void it821x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { + const u8 speed = drive->dma_mode; + /* * MWDMA tuning is really hard because our MWDMA and PIO * timings are kept in the same place. We can switch in the diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c index ebffb904ed2..74c2c4a6d90 100644 --- a/drivers/ide/jmicron.c +++ b/drivers/ide/jmicron.c @@ -86,13 +86,13 @@ static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * jmicron_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @mode: DMA mode * * As the JMicron snoops for timings we don't need to do anything here. */ -static void jmicron_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void jmicron_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { } diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index 0f262d07c37..35448c91b8c 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -188,10 +188,11 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate, writel(val32, base + BK3710_REGRCVR); } -static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed) +static void palm_bk3710_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { int is_slave = drive->dn & 1; - void __iomem *base = (void *)drive->hwif->dma_base; + void __iomem *base = (void *)hwif->dma_base; + const u8 xferspeed = drive->dma_mode; if (xferspeed >= XFER_UDMA_0) { palm_bk3710_setudmamode(base, is_slave, diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c index 874acd2bb6e..9546fe2a93f 100644 --- a/drivers/ide/pdc202xx_new.c +++ b/drivers/ide/pdc202xx_new.c @@ -129,11 +129,11 @@ static struct udma_timing { { 0x1a, 0x01, 0xcb }, /* UDMA mode 6 */ }; -static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void pdcnew_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 adj = (drive->dn & 1) ? 0x08 : 0x00; + const u8 speed = drive->dma_mode; /* * IDE core issues SETFEATURES_XFER to the drive first (thanks to diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index 402aab7f3ba..07cd37516ba 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -21,11 +21,11 @@ #define DRV_NAME "pdc202xx_old" -static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) +static void pdc202xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 drive_pci = 0x60 + (drive->dn << 2); + const u8 speed = drive->dma_mode; u8 AP = 0, BP = 0, CP = 0; u8 TA = 0, TB = 0, TC = 0; @@ -78,7 +78,8 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - pdc202xx_set_mode(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + pdc202xx_set_mode(hwif, drive); } static int pdc202xx_test_irq(ide_hwif_t *hwif) diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c index 64b3041daa6..1bdca49e5a0 100644 --- a/drivers/ide/piix.c +++ b/drivers/ide/piix.c @@ -127,16 +127,15 @@ static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * piix_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Set a PIIX host controller to the desired DMA mode. This involves * programming the right timing data into the PCI configuration space. */ -static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void piix_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = hwif->channel ? 0x42 : 0x40; int a_speed = 3 << (drive->dn * 4); @@ -147,6 +146,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) int sitre; u16 reg4042, reg4a; u8 reg48, reg54, reg55; + const u8 speed = drive->dma_mode; pci_read_config_word(dev, maslave, ®4042); sitre = (reg4042 & 0x4000) ? 1 : 0; diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index a167968a2d4..9fae1fb1468 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -777,14 +777,14 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, #endif } -static void pmac_ide_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void pmac_ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent); int ret = 0; u32 *timings, *timings2, tl[2]; u8 unit = drive->dn & 1; + const u8 speed = drive->dma_mode; timings = &pmif->timings[unit]; timings2 = &pmif->timings[unit+2]; diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index bb0166e460a..134f1fd1386 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -122,13 +122,13 @@ out: return mask; } -static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void sc1200_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned int reg, timings; unsigned short pci_clock; unsigned int basereg = hwif->channel ? 0x50 : 0x40; + const u8 mode = drive->dma_mode; static const u32 udma_timing[3][3] = { { 0x00921250, 0x00911140, 0x00911030 }, diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 23e16e4460e..e9d4b441d1c 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -231,16 +231,15 @@ static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * scc_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Load the timing settings for this device mode into the * controller. */ -static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void scc_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct scc_ports *ports = ide_get_hwifdata(hwif); unsigned long ctl_base = ports->ctl; unsigned long cckctrl_port = ctl_base + 0xff0; @@ -254,6 +253,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed) int offset, idx; unsigned long reg; unsigned long jcactsel; + const u8 speed = drive->dma_mode; reg = in_be32((void __iomem *)cckctrl_port); if (reg & CCKCTRL_ATACLKOEN) { diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index a56bc51ae03..35fb8dabb55 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -128,14 +128,14 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) } } -static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u8 udma_modes[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 }; static const u8 dma_modes[] = { 0x77, 0x21, 0x20 }; static const u8 drive_pci2[] = { 0x45, 0x44, 0x47, 0x46 }; - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); + const u8 speed = drive->dma_mode; u8 unit = drive->dn & 1; u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index b7d61dc6409..e3ea591f66d 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -255,7 +255,7 @@ static int sgiioc4_dma_end(ide_drive_t *drive) return dma_stat; } -static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void sgiioc4_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { } diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index 97266958f74..2009ac2ff65 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -289,19 +289,18 @@ static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /** * sil_set_dma_mode - set host controller for DMA mode + * @hwif: port * @drive: drive - * @speed: DMA mode * * Tune the SiI chipset for the desired DMA mode. */ -static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void sil_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static const u8 ultra6[] = { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 }; static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 }; static const u16 dma[] = { 0x2208, 0x10C2, 0x10C1 }; - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned long base = (unsigned long)hwif->hwif_data; u16 ultra = 0, multi = 0; @@ -311,6 +310,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed) : (mmio ? 0xB4 : 0x80); unsigned long ma = siimage_seldev(drive, 0x08); unsigned long ua = siimage_seldev(drive, 0x0C); + const u8 speed = drive->dma_mode; scsc = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A)); mode = sil_ioread8 (dev, base + addr_mask); diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c index 5a019206053..db7f4e761db 100644 --- a/drivers/ide/sis5513.c +++ b/drivers/ide/sis5513.c @@ -340,8 +340,10 @@ static void sis_program_udma_timings(ide_drive_t *drive, const u8 mode) sis_ata33_program_udma_timings(drive, mode); } -static void sis_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void sis_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { + const u8 speed = drive->dma_mode; + if (speed >= XFER_UDMA_0) sis_program_udma_timings(drive, speed); else diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index 419cd3bc6c8..f21dc2ad768 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -92,11 +92,12 @@ static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /* * Configure the chipset for DMA mode. */ -static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void sl82c105_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200}; unsigned long timings = (unsigned long)ide_get_drivedata(drive); u16 drv_ctrl; + const u8 speed = drive->dma_mode; drv_ctrl = mwdma_timings[speed - XFER_MW_DMA_0]; diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c index 019777522cd..864ffe0e26d 100644 --- a/drivers/ide/slc90e66.c +++ b/drivers/ide/slc90e66.c @@ -72,14 +72,14 @@ static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) spin_unlock_irqrestore(&slc90e66_lock, flags); } -static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed) +static void slc90e66_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = hwif->channel ? 0x42 : 0x40; int sitre = 0, a_speed = 7 << (drive->dn * 4); int u_speed = 0, u_flag = 1 << drive->dn; u16 reg4042, reg44, reg48, reg4a; + const u8 speed = drive->dma_mode; pci_read_config_word(dev, maslave, ®4042); sitre = (reg4042 & 0x4000) ? 1 : 0; diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c index f2cb62bf3f2..e444d24934b 100644 --- a/drivers/ide/tc86c001.c +++ b/drivers/ide/tc86c001.c @@ -13,11 +13,11 @@ #define DRV_NAME "tc86c001" -static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed) +static void tc86c001_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; unsigned long scr_port = hwif->config_data + (drive->dn ? 0x02 : 0x00); u16 mode, scr = inw(scr_port); + const u8 speed = drive->dma_mode; switch (speed) { case XFER_UDMA_4: mode = 0x00c0; break; @@ -43,7 +43,8 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed) static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - tc86c001_set_mode(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + tc86c001_set_mode(hwif, drive); } /* diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c index d34a7eecdea..7953447eae0 100644 --- a/drivers/ide/triflex.c +++ b/drivers/ide/triflex.c @@ -34,9 +34,8 @@ #define DRV_NAME "triflex" -static void triflex_set_mode(ide_drive_t *drive, const u8 speed) +static void triflex_set_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u32 triflex_timings = 0; u16 timing = 0; @@ -44,7 +43,7 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed) pci_read_config_dword(dev, channel_offset, &triflex_timings); - switch(speed) { + switch (drive->dma_mode) { case XFER_MW_DMA_2: timing = 0x0103; break; @@ -84,7 +83,8 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed) static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - triflex_set_mode(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + triflex_set_mode(hwif, drive); } static const struct ide_port_ops triflex_port_ops = { diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 5228a4786de..f210633a3d5 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -125,10 +125,10 @@ static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) /* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */ } -static void tx4939ide_set_dma_mode(ide_drive_t *drive, const u8 mode) +static void tx4939ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; u32 mask, val; + const u8 mode = drive->dma_mode; /* Update Data Transfer Mode for this drive. */ if (mode >= XFER_UDMA_0) diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index 6d995fc9d4f..6769fe252b0 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -169,22 +169,22 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) /** * via_set_drive - configure transfer mode + * @hwif: port * @drive: Drive to set up - * @speed: desired speed * * via_set_drive() computes timing values configures the chipset to * a desired transfer mode. It also can be called by upper layers. */ -static void via_set_drive(ide_drive_t *drive, const u8 speed) +static void via_set_drive(ide_hwif_t *hwif, ide_drive_t *drive) { - ide_hwif_t *hwif = drive->hwif; ide_drive_t *peer = ide_get_pair_dev(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct via82cxxx_dev *vdev = host->host_priv; struct ide_timing t, p; unsigned int T, UT; + const u8 speed = drive->dma_mode; T = 1000000000 / via_clock; @@ -216,7 +216,8 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed) static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) { - via_set_drive(drive, drive->pio_mode); + drive->dma_mode = drive->pio_mode; + via_set_drive(hwif, drive); } static struct via_isa_bridge *via_config_find(struct pci_dev **isa) diff --git a/include/linux/ide.h b/include/linux/ide.h index 803ec306883..53ecdba82d7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -625,7 +625,7 @@ extern const struct ide_tp_ops default_tp_ops; struct ide_port_ops { void (*init_dev)(ide_drive_t *); void (*set_pio_mode)(struct hwif_s *, ide_drive_t *); - void (*set_dma_mode)(ide_drive_t *, const u8); + void (*set_dma_mode)(struct hwif_s *, ide_drive_t *); int (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); void (*resetproc)(ide_drive_t *); -- cgit v1.2.3-70-g09d2 From 220c58bc6d1198c4c4e69a385d364602c38b6b1c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 18 Jan 2010 07:22:38 +0000 Subject: ide: make ide_get_best_pio_mode() static Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-xfer-mode.c | 3 +-- include/linux/ide.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index 9b549e4d184..5fc8d5c17de 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL(ide_xfer_verbose); * This is used by most chipset support modules when "auto-tuning". */ -u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode) +static u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode) { u16 *id = drive->id; int pio_mode = -1, overridden = 0; @@ -105,7 +105,6 @@ u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode) return pio_mode; } -EXPORT_SYMBOL_GPL(ide_get_best_pio_mode); int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio) { diff --git a/include/linux/ide.h b/include/linux/ide.h index 53ecdba82d7..97e6ab43518 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1496,7 +1496,6 @@ int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); #ifdef CONFIG_IDE_XFER_MODE int ide_scan_pio_blacklist(char *); const char *ide_xfer_verbose(u8); -u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); int ide_pio_need_iordy(ide_drive_t *, const u8); int ide_set_pio_mode(ide_drive_t *, u8); int ide_set_dma_mode(ide_drive_t *, u8); -- cgit v1.2.3-70-g09d2 From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jan 2010 13:46:10 -0800 Subject: net: Unexport napi_gro_flush(). Nothing outside of net/core/dev.c uses it. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/core/dev.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3fccc85b1a..468a11dea58 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1527,7 +1527,6 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); -extern void napi_gro_flush(struct napi_struct *napi); extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index a008f6987a9..5747b9edc1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2582,7 +2582,7 @@ out: return netif_receive_skb(skb); } -void napi_gro_flush(struct napi_struct *napi) +static void napi_gro_flush(struct napi_struct *napi) { struct sk_buff *skb, *next; @@ -2595,7 +2595,6 @@ void napi_gro_flush(struct napi_struct *napi) napi->gro_count = 0; napi->gro_list = NULL; } -EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { -- cgit v1.2.3-70-g09d2 From 552e450929a7298cc8834fd2824a60b2e914f70e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 20 Jan 2010 13:49:45 -0700 Subject: spi/dw_spi: refine the IRQ mode working flow Now dw_spi core fully supports 3 transfer modes: pure polling, DMA and IRQ mode. IRQ mode will use the FIFO half empty as the IRQ trigger, so each interface driver need set the fifo_len, so that core driver can handle it properly Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 64 ++++++++++++++++++++++++++++------------------ drivers/spi/dw_spi_pci.c | 1 + include/linux/spi/dw_spi.h | 1 + 3 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 521d680af28..1bb709b3920 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -358,6 +358,8 @@ static void transfer_complete(struct dw_spi *dws) static irqreturn_t interrupt_transfer(struct dw_spi *dws) { u16 irq_status, irq_mask = 0x3f; + u32 int_level = dws->fifo_len / 2; + u32 left; irq_status = dw_readw(dws, isr) & irq_mask; /* Error handling */ @@ -369,22 +371,23 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) return IRQ_HANDLED; } - /* INT comes from tx */ - if (dws->tx && (irq_status & SPI_INT_TXEI)) { - while (dws->tx < dws->tx_end) + if (irq_status & SPI_INT_TXEI) { + spi_mask_intr(dws, SPI_INT_TXEI); + + left = (dws->tx_end - dws->tx) / dws->n_bytes; + left = (left > int_level) ? int_level : left; + + while (left--) dws->write(dws); + dws->read(dws); - if (dws->tx == dws->tx_end) { - spi_mask_intr(dws, SPI_INT_TXEI); + /* Re-enable the IRQ if there is still data left to tx */ + if (dws->tx_end > dws->tx) + spi_umask_intr(dws, SPI_INT_TXEI); + else transfer_complete(dws); - } } - /* INT comes from rx */ - if (dws->rx && (irq_status & SPI_INT_RXFI)) { - if (dws->read(dws)) - transfer_complete(dws); - } return IRQ_HANDLED; } @@ -428,6 +431,7 @@ static void pump_transfers(unsigned long data) u8 bits = 0; u8 imask = 0; u8 cs_change = 0; + u16 txint_level = 0; u16 clk_div = 0; u32 speed = 0; u32 cr0 = 0; @@ -438,6 +442,9 @@ static void pump_transfers(unsigned long data) chip = dws->cur_chip; spi = message->spi; + if (unlikely(!chip->clk_div)) + chip->clk_div = dws->max_freq / chip->speed_hz; + if (message->state == ERROR_STATE) { message->status = -EIO; goto early_exit; @@ -492,7 +499,7 @@ static void pump_transfers(unsigned long data) /* clk_div doesn't support odd number */ clk_div = dws->max_freq / speed; - clk_div = (clk_div >> 1) << 1; + clk_div = (clk_div + 1) & 0xfffe; chip->speed_hz = speed; chip->clk_div = clk_div; @@ -535,11 +542,16 @@ static void pump_transfers(unsigned long data) /* Check if current transfer is a DMA transaction */ dws->dma_mapped = map_dma_buffers(dws); + /* + * Interrupt mode + * we only need set the TXEI IRQ, as TX/RX always happen syncronizely + */ if (!dws->dma_mapped && !chip->poll_mode) { - if (dws->rx) - imask |= SPI_INT_RXFI; - if (dws->tx) - imask |= SPI_INT_TXEI; + int templen = dws->len / dws->n_bytes; + txint_level = dws->fifo_len / 2; + txint_level = (templen > txint_level) ? txint_level : templen; + + imask |= SPI_INT_TXEI; dws->transfer_handler = interrupt_transfer; } @@ -549,21 +561,23 @@ static void pump_transfers(unsigned long data) * 2. clk_div is changed * 3. control value changes */ - if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div) { + if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div || imask) { spi_enable_chip(dws, 0); if (dw_readw(dws, ctrl0) != cr0) dw_writew(dws, ctrl0, cr0); + spi_set_clk(dws, clk_div ? clk_div : chip->clk_div); + spi_chip_sel(dws, spi->chip_select); + /* Set the interrupt mask, for poll mode just diable all int */ spi_mask_intr(dws, 0xff); - if (!chip->poll_mode) + if (imask) spi_umask_intr(dws, imask); + if (txint_level) + dw_writew(dws, txfltr, txint_level); - spi_set_clk(dws, clk_div ? clk_div : chip->clk_div); - spi_chip_sel(dws, spi->chip_select); spi_enable_chip(dws, 1); - if (cs_change) dws->prev_chip = chip; } @@ -712,11 +726,11 @@ static int dw_spi_setup(struct spi_device *spi) } chip->bits_per_word = spi->bits_per_word; + if (!spi->max_speed_hz) { + dev_err(&spi->dev, "No max speed HZ parameter\n"); + return -EINVAL; + } chip->speed_hz = spi->max_speed_hz; - if (chip->speed_hz) - chip->clk_div = 25000000 / chip->speed_hz; - else - chip->clk_div = 8; /* default value */ chip->tmode = 0; /* Tx & Rx */ /* Default SPI mode is SCPOL = 0, SCPH = 0 */ diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c index 7980f1443ce..1f0735f9cc7 100644 --- a/drivers/spi/dw_spi_pci.c +++ b/drivers/spi/dw_spi_pci.c @@ -73,6 +73,7 @@ static int __devinit spi_pci_probe(struct pci_dev *pdev, dws->num_cs = 4; dws->max_freq = 25000000; /* for Moorestwon */ dws->irq = pdev->irq; + dws->fifo_len = 40; /* FIFO has 40 words buffer */ ret = dw_spi_add_host(dws); if (ret) diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index 51b3e771a9a..1a127a31e01 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -90,6 +90,7 @@ struct dw_spi { unsigned long paddr; u32 iolen; int irq; + u32 fifo_len; /* depth of the FIFO buffer */ u32 max_freq; /* max bus freq supported */ u16 bus_num; -- cgit v1.2.3-70-g09d2 From 3bf127637e22ddf95e67e10a23c339cee3d52429 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 21 Jan 2010 00:02:36 -0800 Subject: Input: sh_keysc - add mode 4 and mode 5 support Add Mode 4 and Mode 5 support to the SH_KEYSC driver. These modes allow slightly larger key pad matrixes. While at it, make use of resource_size(). Signed-off-by: Magnus Damm Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/sh_keysc.c | 6 +++--- include/linux/input/sh_keysc.h | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 076111fc72d..25706f80225 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -36,6 +36,8 @@ static const struct { [SH_KEYSC_MODE_1] = { 0, 6, 5 }, [SH_KEYSC_MODE_2] = { 1, 5, 6 }, [SH_KEYSC_MODE_3] = { 2, 4, 7 }, + [SH_KEYSC_MODE_4] = { 3, 6, 6 }, + [SH_KEYSC_MODE_5] = { 4, 6, 7 }, }; struct sh_keysc_priv { @@ -122,8 +124,6 @@ static irqreturn_t sh_keysc_isr(int irq, void *dev_id) return IRQ_HANDLED; } -#define res_size(res) ((res)->end - (res)->start + 1) - static int __devinit sh_keysc_probe(struct platform_device *pdev) { struct sh_keysc_priv *priv; @@ -164,7 +164,7 @@ static int __devinit sh_keysc_probe(struct platform_device *pdev) memcpy(&priv->pdata, pdev->dev.platform_data, sizeof(priv->pdata)); pdata = &priv->pdata; - priv->iomem_base = ioremap_nocache(res->start, res_size(res)); + priv->iomem_base = ioremap_nocache(res->start, resource_size(res)); if (priv->iomem_base == NULL) { dev_err(&pdev->dev, "failed to remap I/O memory\n"); error = -ENXIO; diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h index c211b5cf08e..2aff38bcf2b 100644 --- a/include/linux/input/sh_keysc.h +++ b/include/linux/input/sh_keysc.h @@ -1,10 +1,11 @@ #ifndef __SH_KEYSC_H__ #define __SH_KEYSC_H__ -#define SH_KEYSC_MAXKEYS 30 +#define SH_KEYSC_MAXKEYS 42 struct sh_keysc_info { - enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3 } mode; + enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3, + SH_KEYSC_MODE_4, SH_KEYSC_MODE_5 } mode; int scan_timing; /* 0 -> 7, see KYCR1, SCN[2:0] */ int delay; int kycr2_delay; -- cgit v1.2.3-70-g09d2 From 3d45fd804a95055ecab5b3eed81f5ab2dbb047a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Dec 2009 17:12:46 +0100 Subject: sched: Remove the sched_class load_balance methods Take out the sched_class methods for load-balancing. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ------ kernel/sched.c | 26 ------------------- kernel/sched_fair.c | 66 +++++++++++++++++++++++++++---------------------- kernel/sched_idletask.c | 21 ---------------- kernel/sched_rt.c | 20 --------------- 5 files changed, 37 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f2f842db03c..50d685cde70 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1087,14 +1087,6 @@ struct sched_class { #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); - unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, - struct rq *busiest, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio); - - int (*move_one_task) (struct rq *this_rq, int this_cpu, - struct rq *busiest, struct sched_domain *sd, - enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); void (*task_waking) (struct rq *this_rq, struct task_struct *task); diff --git a/kernel/sched.c b/kernel/sched.c index 13a2acf18b2..c0be07932a8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1390,32 +1390,6 @@ static const u32 prio_to_wmult[40] = { /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); - -/* - * runqueue iterator, to support SMP load-balancing between different - * scheduling classes, without having to expose their internal data - * structures to the load-balancing proper: - */ -struct rq_iterator { - void *arg; - struct task_struct *(*start)(void *); - struct task_struct *(*next)(void *); -}; - -#ifdef CONFIG_SMP -static unsigned long -balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, struct sched_domain *sd, - enum cpu_idle_type idle, int *all_pinned, - int *this_best_prio, struct rq_iterator *iterator); - -static int -iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle, - struct rq_iterator *iterator); -#endif - /* Time spent by the tasks of the cpu accounting group executing in ... */ enum cpuacct_stat_index { CPUACCT_STAT_USER, /* ... user mode */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5116b81d772..faf9a2f099a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1851,6 +1851,24 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); } +/* + * runqueue iterator, to support SMP load-balancing between different + * scheduling classes, without having to expose their internal data + * structures to the load-balancing proper: + */ +struct rq_iterator { + void *arg; + struct task_struct *(*start)(void *); + struct task_struct *(*next)(void *); +}; + +static unsigned long +balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_load_move, struct sched_domain *sd, + enum cpu_idle_type idle, int *all_pinned, + int *this_best_prio, struct rq_iterator *iterator); + + static unsigned long __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, @@ -1929,8 +1947,20 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, #endif static int -move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) +iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle, + struct rq_iterator *iterator); + +/* + * move_one_task tries to move exactly one task from busiest to this_rq, as + * part of active balancing operations within "domain". + * Returns 1 if successful and 0 otherwise. + * + * Called with both runqueues locked. + */ +static int +move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) { struct cfs_rq *busy_cfs_rq; struct rq_iterator cfs_rq_iterator; @@ -2094,16 +2124,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { - const struct sched_class *class = sched_class_highest; - unsigned long total_load_moved = 0; + unsigned long total_load_moved = 0, load_moved; int this_best_prio = this_rq->curr->prio; do { - total_load_moved += - class->load_balance(this_rq, this_cpu, busiest, + load_moved = load_balance_fair(this_rq, this_cpu, busiest, max_load_move - total_load_moved, sd, idle, all_pinned, &this_best_prio); - class = class->next; + + total_load_moved += load_moved; #ifdef CONFIG_PREEMPT /* @@ -2114,7 +2143,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) break; #endif - } while (class && max_load_move > total_load_moved); + } while (load_moved && max_load_move > total_load_moved); return total_load_moved > 0; } @@ -2145,25 +2174,6 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, return 0; } -/* - * move_one_task tries to move exactly one task from busiest to this_rq, as - * part of active balancing operations within "domain". - * Returns 1 if successful and 0 otherwise. - * - * Called with both runqueues locked. - */ -static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - const struct sched_class *class; - - for_each_class(class) { - if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle)) - return 1; - } - - return 0; -} /********** Helpers for find_busiest_group ************************/ /* * sd_lb_stats - Structure to store the statistics of a sched_domain @@ -3873,8 +3883,6 @@ static const struct sched_class fair_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_fair, - .load_balance = load_balance_fair, - .move_one_task = move_one_task_fair, .rq_online = rq_online_fair, .rq_offline = rq_offline_fair, diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 01332bfc61a..a8a6d8a5094 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -44,24 +44,6 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } -#ifdef CONFIG_SMP -static unsigned long -load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) -{ - return 0; -} - -static int -move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - return 0; -} -#endif - static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) { } @@ -119,9 +101,6 @@ static const struct sched_class idle_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_idle, - - .load_balance = load_balance_idle, - .move_one_task = move_one_task_idle, #endif .set_curr_task = set_curr_task_idle, diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 072b3fcee8d..502bb614e40 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1481,24 +1481,6 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) push_rt_tasks(rq); } -static unsigned long -load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) -{ - /* don't touch RT tasks */ - return 0; -} - -static int -move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - /* don't touch RT tasks */ - return 0; -} - static void set_cpus_allowed_rt(struct task_struct *p, const struct cpumask *new_mask) { @@ -1746,8 +1728,6 @@ static const struct sched_class rt_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_rt, - .load_balance = load_balance_rt, - .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, -- cgit v1.2.3-70-g09d2 From 7c9414385ebfdd87cc542d4e7e3bb0dbb2d3ce25 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Wed, 20 Jan 2010 13:26:18 +0100 Subject: sched: Remove USER_SCHED Remove the USER_SCHED feature. It has been scheduled to be removed in 2.6.34 as per http://marc.info/?l=linux-kernel&m=125728479022976&w=2 Signed-off-by: Dhaval Giani Signed-off-by: Peter Zijlstra LKML-Reference: <1263990378.24844.3.camel@localhost> Signed-off-by: Ingo Molnar --- Documentation/feature-removal-schedule.txt | 15 -- include/linux/sched.h | 14 +- init/Kconfig | 81 +++----- kernel/ksysfs.c | 8 - kernel/sched.c | 114 +---------- kernel/sys.c | 5 - kernel/user.c | 305 ----------------------------- 7 files changed, 38 insertions(+), 504 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 870d190fe61..04a3fc3d139 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,21 +6,6 @@ be removed from this file. --------------------------- -What: USER_SCHED -When: 2.6.34 - -Why: USER_SCHED was implemented as a proof of concept for group scheduling. - The effect of USER_SCHED can already be achieved from userspace with - the help of libcgroup. The removal of USER_SCHED will also simplify - the scheduler code with the removal of one major ifdef. There are also - issues USER_SCHED has with USER_NS. A decision was taken not to fix - those and instead remove USER_SCHED. Also new group scheduling - features will not be implemented for USER_SCHED. - -Who: Dhaval Giani - ---------------------------- - What: PRISM54 When: 2.6.34 diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d685cde70..8b079735ae5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -731,14 +731,6 @@ struct user_struct { uid_t uid; struct user_namespace *user_ns; -#ifdef CONFIG_USER_SCHED - struct task_group *tg; -#ifdef CONFIG_SYSFS - struct kobject kobj; - struct delayed_work work; -#endif -#endif - #ifdef CONFIG_PERF_EVENTS atomic_long_t locked_vm; #endif @@ -2502,13 +2494,9 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); extern void normalize_rt_tasks(void); -#ifdef CONFIG_GROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED extern struct task_group init_task_group; -#ifdef CONFIG_USER_SCHED -extern struct task_group root_task_group; -extern void set_tg_uid(struct user_struct *user); -#endif extern struct task_group *sched_create_group(struct task_group *parent); extern void sched_destroy_group(struct task_group *tg); diff --git a/init/Kconfig b/init/Kconfig index a23da9f0180..e9fa3007a6f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -435,57 +435,6 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool -config GROUP_SCHED - bool "Group CPU scheduler" - depends on EXPERIMENTAL - default n - help - This feature lets CPU scheduler recognize task groups and control CPU - bandwidth allocation to such task groups. - In order to create a group from arbitrary set of processes, use - CONFIG_CGROUPS. (See Control Group support.) - -config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" - depends on GROUP_SCHED - default GROUP_SCHED - -config RT_GROUP_SCHED - bool "Group scheduling for SCHED_RR/FIFO" - depends on EXPERIMENTAL - depends on GROUP_SCHED - default n - help - This feature lets you explicitly allocate real CPU bandwidth - to users or control groups (depending on the "Basis for grouping tasks" - setting below. If enabled, it will also make it impossible to - schedule realtime tasks for non-root users until you allocate - realtime bandwidth for them. - See Documentation/scheduler/sched-rt-group.txt for more information. - -choice - depends on GROUP_SCHED - prompt "Basis for grouping tasks" - default USER_SCHED - -config USER_SCHED - bool "user id" - help - This option will choose userid as the basis for grouping - tasks, thus providing equal CPU bandwidth to each user. - -config CGROUP_SCHED - bool "Control groups" - depends on CGROUPS - help - This option allows you to create arbitrary task groups - using the "cgroup" pseudo filesystem and control - the cpu bandwidth allocated to each such task group. - Refer to Documentation/cgroups/cgroups.txt for more - information on "cgroup" pseudo filesystem. - -endchoice - menuconfig CGROUPS boolean "Control Group support" help @@ -606,6 +555,36 @@ config CGROUP_MEM_RES_CTLR_SWAP Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. +menuconfig CGROUP_SCHED + bool "Group CPU scheduler" + depends on EXPERIMENTAL && CGROUPS + default n + help + This feature lets CPU scheduler recognize task groups and control CPU + bandwidth allocation to such task groups. It uses cgroups to group + tasks. + +if CGROUP_SCHED +config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on CGROUP_SCHED + default CGROUP_SCHED + +config RT_GROUP_SCHED + bool "Group scheduling for SCHED_RR/FIFO" + depends on EXPERIMENTAL + depends on CGROUP_SCHED + default n + help + This feature lets you explicitly allocate real CPU bandwidth + to users or control groups (depending on the "Basis for grouping tasks" + setting below. If enabled, it will also make it impossible to + schedule realtime tasks for non-root users until you allocate + realtime bandwidth for them. + See Documentation/scheduler/sched-rt-group.txt for more information. + +endif #CGROUP_SCHED + endif # CGROUPS config MM_OWNER diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 3feaf5a7451..6b1ccc3f020 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -197,16 +197,8 @@ static int __init ksysfs_init(void) goto group_exit; } - /* create the /sys/kernel/uids/ directory */ - error = uids_sysfs_init(); - if (error) - goto notes_exit; - return 0; -notes_exit: - if (notes_size > 0) - sysfs_remove_bin_file(kernel_kobj, ¬es_attr); group_exit: sysfs_remove_group(kernel_kobj, &kernel_attr_group); kset_exit: diff --git a/kernel/sched.c b/kernel/sched.c index c0be07932a8..41e76d32564 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -233,7 +233,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) */ static DEFINE_MUTEX(sched_domains_mutex); -#ifdef CONFIG_GROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED #include @@ -243,13 +243,7 @@ static LIST_HEAD(task_groups); /* task group related information */ struct task_group { -#ifdef CONFIG_CGROUP_SCHED struct cgroup_subsys_state css; -#endif - -#ifdef CONFIG_USER_SCHED - uid_t uid; -#endif #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ @@ -274,35 +268,7 @@ struct task_group { struct list_head children; }; -#ifdef CONFIG_USER_SCHED - -/* Helper function to pass uid information to create_sched_user() */ -void set_tg_uid(struct user_struct *user) -{ - user->tg->uid = user->uid; -} - -/* - * Root task group. - * Every UID task group (including init_task_group aka UID-0) will - * be a child to this group. - */ -struct task_group root_task_group; - -#ifdef CONFIG_FAIR_GROUP_SCHED -/* Default task group's sched entity on each cpu */ -static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); -/* Default task group's cfs_rq on each cpu */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); -#endif /* CONFIG_FAIR_GROUP_SCHED */ - -#ifdef CONFIG_RT_GROUP_SCHED -static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); -#endif /* CONFIG_RT_GROUP_SCHED */ -#else /* !CONFIG_USER_SCHED */ #define root_task_group init_task_group -#endif /* CONFIG_USER_SCHED */ /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -318,11 +284,7 @@ static int root_task_group_empty(void) } #endif -#ifdef CONFIG_USER_SCHED -# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) -#else /* !CONFIG_USER_SCHED */ # define INIT_TASK_GROUP_LOAD NICE_0_LOAD -#endif /* CONFIG_USER_SCHED */ /* * A weight of 0 or 1 can cause arithmetics problems. @@ -348,11 +310,7 @@ static inline struct task_group *task_group(struct task_struct *p) { struct task_group *tg; -#ifdef CONFIG_USER_SCHED - rcu_read_lock(); - tg = __task_cred(p)->user->tg; - rcu_read_unlock(); -#elif defined(CONFIG_CGROUP_SCHED) +#ifdef CONFIG_CGROUP_SCHED tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); #else @@ -383,7 +341,7 @@ static inline struct task_group *task_group(struct task_struct *p) return NULL; } -#endif /* CONFIG_GROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ /* CFS-related fields in a runqueue */ struct cfs_rq { @@ -7678,9 +7636,6 @@ void __init sched_init(void) #ifdef CONFIG_RT_GROUP_SCHED alloc_size += 2 * nr_cpu_ids * sizeof(void **); #endif -#ifdef CONFIG_USER_SCHED - alloc_size *= 2; -#endif #ifdef CONFIG_CPUMASK_OFFSTACK alloc_size += num_possible_cpus() * cpumask_size(); #endif @@ -7694,13 +7649,6 @@ void __init sched_init(void) init_task_group.cfs_rq = (struct cfs_rq **)ptr; ptr += nr_cpu_ids * sizeof(void **); -#ifdef CONFIG_USER_SCHED - root_task_group.se = (struct sched_entity **)ptr; - ptr += nr_cpu_ids * sizeof(void **); - - root_task_group.cfs_rq = (struct cfs_rq **)ptr; - ptr += nr_cpu_ids * sizeof(void **); -#endif /* CONFIG_USER_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED init_task_group.rt_se = (struct sched_rt_entity **)ptr; @@ -7709,13 +7657,6 @@ void __init sched_init(void) init_task_group.rt_rq = (struct rt_rq **)ptr; ptr += nr_cpu_ids * sizeof(void **); -#ifdef CONFIG_USER_SCHED - root_task_group.rt_se = (struct sched_rt_entity **)ptr; - ptr += nr_cpu_ids * sizeof(void **); - - root_task_group.rt_rq = (struct rt_rq **)ptr; - ptr += nr_cpu_ids * sizeof(void **); -#endif /* CONFIG_USER_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */ #ifdef CONFIG_CPUMASK_OFFSTACK for_each_possible_cpu(i) { @@ -7735,22 +7676,13 @@ void __init sched_init(void) #ifdef CONFIG_RT_GROUP_SCHED init_rt_bandwidth(&init_task_group.rt_bandwidth, global_rt_period(), global_rt_runtime()); -#ifdef CONFIG_USER_SCHED - init_rt_bandwidth(&root_task_group.rt_bandwidth, - global_rt_period(), RUNTIME_INF); -#endif /* CONFIG_USER_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */ -#ifdef CONFIG_GROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED list_add(&init_task_group.list, &task_groups); INIT_LIST_HEAD(&init_task_group.children); -#ifdef CONFIG_USER_SCHED - INIT_LIST_HEAD(&root_task_group.children); - init_task_group.parent = &root_task_group; - list_add(&init_task_group.siblings, &root_task_group.children); -#endif /* CONFIG_USER_SCHED */ -#endif /* CONFIG_GROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), @@ -7790,25 +7722,6 @@ void __init sched_init(void) * directly in rq->cfs (i.e init_task_group->se[] = NULL). */ init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); -#elif defined CONFIG_USER_SCHED - root_task_group.shares = NICE_0_LOAD; - init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL); - /* - * In case of task-groups formed thr' the user id of tasks, - * init_task_group represents tasks belonging to root user. - * Hence it forms a sibling of all subsequent groups formed. - * In this case, init_task_group gets only a fraction of overall - * system cpu resource, based on the weight assigned to root - * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished - * by letting tasks of init_task_group sit in a separate cfs_rq - * (init_tg_cfs_rq) and having one entity represent this group of - * tasks in rq->cfs (i.e init_task_group->se[] != NULL). - */ - init_tg_cfs_entry(&init_task_group, - &per_cpu(init_tg_cfs_rq, i), - &per_cpu(init_sched_entity, i), i, 1, - root_task_group.se[i]); - #endif #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -7817,12 +7730,6 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->leaf_rt_rq_list); #ifdef CONFIG_CGROUP_SCHED init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); -#elif defined CONFIG_USER_SCHED - init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); - init_tg_rt_entry(&init_task_group, - &per_cpu(init_rt_rq_var, i), - &per_cpu(init_sched_rt_entity, i), i, 1, - root_task_group.rt_se[i]); #endif #endif @@ -8218,7 +8125,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) } #endif /* CONFIG_RT_GROUP_SCHED */ -#ifdef CONFIG_GROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED static void free_sched_group(struct task_group *tg) { free_fair_sched_group(tg); @@ -8327,7 +8234,7 @@ void sched_move_task(struct task_struct *tsk) task_rq_unlock(rq, &flags); } -#endif /* CONFIG_GROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ #ifdef CONFIG_FAIR_GROUP_SCHED static void __set_se_shares(struct sched_entity *se, unsigned long shares) @@ -8469,13 +8376,6 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } -#ifdef CONFIG_USER_SCHED - if (tg == &root_task_group) { - period = global_rt_period(); - runtime = global_rt_runtime(); - } -#endif - /* * Cannot have more runtime than the period. */ diff --git a/kernel/sys.c b/kernel/sys.c index 26a6b73a6b8..f75bf0936f4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -569,11 +569,6 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; - if (!task_can_switch_user(new_user, current)) { - free_uid(new_user); - return -EINVAL; - } - if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { diff --git a/kernel/user.c b/kernel/user.c index 46d0165ca70..766467b3bcb 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -56,9 +56,6 @@ struct user_struct root_user = { .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .user_ns = &init_user_ns, -#ifdef CONFIG_USER_SCHED - .tg = &init_task_group, -#endif }; /* @@ -75,268 +72,6 @@ static void uid_hash_remove(struct user_struct *up) put_user_ns(up->user_ns); } -#ifdef CONFIG_USER_SCHED - -static void sched_destroy_user(struct user_struct *up) -{ - sched_destroy_group(up->tg); -} - -static int sched_create_user(struct user_struct *up) -{ - int rc = 0; - - up->tg = sched_create_group(&root_task_group); - if (IS_ERR(up->tg)) - rc = -ENOMEM; - - set_tg_uid(up); - - return rc; -} - -#else /* CONFIG_USER_SCHED */ - -static void sched_destroy_user(struct user_struct *up) { } -static int sched_create_user(struct user_struct *up) { return 0; } - -#endif /* CONFIG_USER_SCHED */ - -#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) - -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) -{ - struct user_struct *user; - struct hlist_node *h; - - hlist_for_each_entry(user, h, hashent, uidhash_node) { - if (user->uid == uid) { - /* possibly resurrect an "almost deleted" object */ - if (atomic_inc_return(&user->__count) == 1) - cancel_delayed_work(&user->work); - return user; - } - } - - return NULL; -} - -static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ -static DEFINE_MUTEX(uids_mutex); - -static inline void uids_mutex_lock(void) -{ - mutex_lock(&uids_mutex); -} - -static inline void uids_mutex_unlock(void) -{ - mutex_unlock(&uids_mutex); -} - -/* uid directory attributes */ -#ifdef CONFIG_FAIR_GROUP_SCHED -static ssize_t cpu_shares_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - - return sprintf(buf, "%lu\n", sched_group_shares(up->tg)); -} - -static ssize_t cpu_shares_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t size) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - unsigned long shares; - int rc; - - sscanf(buf, "%lu", &shares); - - rc = sched_group_set_shares(up->tg, shares); - - return (rc ? rc : size); -} - -static struct kobj_attribute cpu_share_attr = - __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); -#endif - -#ifdef CONFIG_RT_GROUP_SCHED -static ssize_t cpu_rt_runtime_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - - return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg)); -} - -static ssize_t cpu_rt_runtime_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t size) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - unsigned long rt_runtime; - int rc; - - sscanf(buf, "%ld", &rt_runtime); - - rc = sched_group_set_rt_runtime(up->tg, rt_runtime); - - return (rc ? rc : size); -} - -static struct kobj_attribute cpu_rt_runtime_attr = - __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); - -static ssize_t cpu_rt_period_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - - return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg)); -} - -static ssize_t cpu_rt_period_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t size) -{ - struct user_struct *up = container_of(kobj, struct user_struct, kobj); - unsigned long rt_period; - int rc; - - sscanf(buf, "%lu", &rt_period); - - rc = sched_group_set_rt_period(up->tg, rt_period); - - return (rc ? rc : size); -} - -static struct kobj_attribute cpu_rt_period_attr = - __ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store); -#endif - -/* default attributes per uid directory */ -static struct attribute *uids_attributes[] = { -#ifdef CONFIG_FAIR_GROUP_SCHED - &cpu_share_attr.attr, -#endif -#ifdef CONFIG_RT_GROUP_SCHED - &cpu_rt_runtime_attr.attr, - &cpu_rt_period_attr.attr, -#endif - NULL -}; - -/* the lifetime of user_struct is not managed by the core (now) */ -static void uids_release(struct kobject *kobj) -{ - return; -} - -static struct kobj_type uids_ktype = { - .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = uids_attributes, - .release = uids_release, -}; - -/* - * Create /sys/kernel/uids//cpu_share file for this user - * We do not create this file for users in a user namespace (until - * sysfs tagging is implemented). - * - * See Documentation/scheduler/sched-design-CFS.txt for ramifications. - */ -static int uids_user_create(struct user_struct *up) -{ - struct kobject *kobj = &up->kobj; - int error; - - memset(kobj, 0, sizeof(struct kobject)); - if (up->user_ns != &init_user_ns) - return 0; - kobj->kset = uids_kset; - error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid); - if (error) { - kobject_put(kobj); - goto done; - } - - kobject_uevent(kobj, KOBJ_ADD); -done: - return error; -} - -/* create these entries in sysfs: - * "/sys/kernel/uids" directory - * "/sys/kernel/uids/0" directory (for root user) - * "/sys/kernel/uids/0/cpu_share" file (for root user) - */ -int __init uids_sysfs_init(void) -{ - uids_kset = kset_create_and_add("uids", NULL, kernel_kobj); - if (!uids_kset) - return -ENOMEM; - - return uids_user_create(&root_user); -} - -/* delayed work function to remove sysfs directory for a user and free up - * corresponding structures. - */ -static void cleanup_user_struct(struct work_struct *w) -{ - struct user_struct *up = container_of(w, struct user_struct, work.work); - unsigned long flags; - int remove_user = 0; - - /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() - * atomic. - */ - uids_mutex_lock(); - - spin_lock_irqsave(&uidhash_lock, flags); - if (atomic_read(&up->__count) == 0) { - uid_hash_remove(up); - remove_user = 1; - } - spin_unlock_irqrestore(&uidhash_lock, flags); - - if (!remove_user) - goto done; - - if (up->user_ns == &init_user_ns) { - kobject_uevent(&up->kobj, KOBJ_REMOVE); - kobject_del(&up->kobj); - kobject_put(&up->kobj); - } - - sched_destroy_user(up); - key_put(up->uid_keyring); - key_put(up->session_keyring); - kmem_cache_free(uid_cachep, up); - -done: - uids_mutex_unlock(); -} - -/* IRQs are disabled and uidhash_lock is held upon function entry. - * IRQ state (as stored in flags) is restored and uidhash_lock released - * upon function exit. - */ -static void free_user(struct user_struct *up, unsigned long flags) -{ - INIT_DELAYED_WORK(&up->work, cleanup_user_struct); - schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); - spin_unlock_irqrestore(&uidhash_lock, flags); -} - -#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ - static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) { struct user_struct *user; @@ -352,11 +87,6 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) return NULL; } -int uids_sysfs_init(void) { return 0; } -static inline int uids_user_create(struct user_struct *up) { return 0; } -static inline void uids_mutex_lock(void) { } -static inline void uids_mutex_unlock(void) { } - /* IRQs are disabled and uidhash_lock is held upon function entry. * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. @@ -365,32 +95,11 @@ static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); - sched_destroy_user(up); key_put(up->uid_keyring); key_put(up->session_keyring); kmem_cache_free(uid_cachep, up); } -#endif - -#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) -/* - * We need to check if a setuid can take place. This function should be called - * before successfully completing the setuid. - */ -int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) -{ - - return sched_rt_can_attach(up->tg, tsk); - -} -#else -int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) -{ - return 1; -} -#endif - /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). @@ -431,8 +140,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) /* Make uid_hash_find() + uids_user_create() + uid_hash_insert() * atomic. */ - uids_mutex_lock(); - spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); spin_unlock_irq(&uidhash_lock); @@ -445,14 +152,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) new->uid = uid; atomic_set(&new->__count, 1); - if (sched_create_user(new) < 0) - goto out_free_user; - new->user_ns = get_user_ns(ns); - if (uids_user_create(new)) - goto out_destoy_sched; - /* * Before adding this, check whether we raced * on adding the same user already.. @@ -475,17 +176,11 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) spin_unlock_irq(&uidhash_lock); } - uids_mutex_unlock(); - return up; -out_destoy_sched: - sched_destroy_user(new); put_user_ns(new->user_ns); -out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: - uids_mutex_unlock(); return NULL; } -- cgit v1.2.3-70-g09d2 From 83fe518a839e317480e50a138ef4acd73510d7ce Mon Sep 17 00:00:00 2001 From: George Shore Date: Thu, 21 Jan 2010 11:40:48 +0000 Subject: spi/dw_spi: enable platform specific chipselect. The driver core allows for a platform-specific chipselect assert/deassert function, however the chipselect function in the core doesn't take advantage of this fact. This enables the use of a custom function, should it be defined. Signed-off-by: George Shore Signed-off-by: Grant Likely --- include/linux/spi/dw_spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index 1a127a31e01..cc813f95a2f 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -172,6 +172,10 @@ static inline void spi_chip_sel(struct dw_spi *dws, u16 cs) { if (cs > dws->num_cs) return; + + if (dws->cs_control) + dws->cs_control(1); + dw_writel(dws, ser, 1 << cs); } -- cgit v1.2.3-70-g09d2 From ea87bb7853168434f4a82426dd1ea8421f9e604d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Jan 2010 20:58:57 +0000 Subject: sched: Extend enqueue_task to allow head queueing The ability of enqueueing a task to the head of a SCHED_FIFO priority list is required to fix some violations of POSIX scheduling policy. Extend the related functions with a "head" argument. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Tested-by: Carsten Emde Tested-by: Mathias Weber LKML-Reference: <20100120171629.734886007@linutronix.de> --- include/linux/sched.h | 3 ++- kernel/sched.c | 13 +++++++------ kernel/sched_fair.c | 3 ++- kernel/sched_rt.c | 3 ++- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8b079735ae5..b35c0c7130c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1067,7 +1067,8 @@ struct sched_domain; struct sched_class { const struct sched_class *next; - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup, + bool head); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq); diff --git a/kernel/sched.c b/kernel/sched.c index 41e76d32564..f47560ff334 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1856,13 +1856,14 @@ static void update_avg(u64 *avg, u64 sample) *avg += diff >> 3; } -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head) { if (wakeup) p->se.start_runtime = p->se.sum_exec_runtime; sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup); + p->sched_class->enqueue_task(rq, p, wakeup, head); p->se.on_rq = 1; } @@ -1892,7 +1893,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) if (task_contributes_to_load(p)) rq->nr_uninterruptible--; - enqueue_task(rq, p, wakeup); + enqueue_task(rq, p, wakeup, false); inc_nr_running(rq); } @@ -4236,7 +4237,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) if (running) p->sched_class->set_curr_task(rq); if (on_rq) { - enqueue_task(rq, p, 0); + enqueue_task(rq, p, 0, false); check_class_changed(rq, p, prev_class, oldprio, running); } @@ -4280,7 +4281,7 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (on_rq) { - enqueue_task(rq, p, 0); + enqueue_task(rq, p, 0, false); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -8230,7 +8231,7 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) tsk->sched_class->set_curr_task(rq); if (on_rq) - enqueue_task(rq, tsk, 0); + enqueue_task(rq, tsk, 0, false); task_rq_unlock(rq, &flags); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 22231ccb2f9..0e7a7af9cf8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1053,7 +1053,8 @@ static inline void hrtick_update(struct rq *rq) * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 502bb614e40..38076dabb44 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -878,7 +878,8 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) /* * Adding/removing a task to/from a priority array: */ -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head) { struct sched_rt_entity *rt_se = &p->rt; -- cgit v1.2.3-70-g09d2 From d0dd2de0d055f0ffb1e2ecdc21380de9d12a85e2 Mon Sep 17 00:00:00 2001 From: Andriy Tkachuk Date: Wed, 20 Jan 2010 13:55:06 +0200 Subject: mac80211: Account HT Control field in Data frame hdrlen according to 802.11n-2009 ieee80211_hdrlen() should account account new HT Control field in 802.11 data frame header introduced by IEEE 802.11n standard. According to 802.11n-2009 HT Control field is present in data frames when both of following are met: 1. It is QoS data frame. 2. Order bit is set in Frame Control field. The change might be totally compatible with legacy non-11n aware frames, because 802.11-2007 standard states that "all QoS STAs set this subfield to 0". Signed-off-by: Andriy V. Tkachuk Acked-by : Benoit Papillault Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 ++ net/wireless/util.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 842701906ae..19984958ab7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -138,6 +138,8 @@ #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 +#define IEEE80211_HT_CTL_LEN 4 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; diff --git a/net/wireless/util.c b/net/wireless/util.c index 23557c1d0a9..be2ab8c59e3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -227,8 +227,11 @@ unsigned int ieee80211_hdrlen(__le16 fc) if (ieee80211_is_data(fc)) { if (ieee80211_has_a4(fc)) hdrlen = 30; - if (ieee80211_is_data_qos(fc)) + if (ieee80211_is_data_qos(fc)) { hdrlen += IEEE80211_QOS_CTL_LEN; + if (ieee80211_has_order(fc)) + hdrlen += IEEE80211_HT_CTL_LEN; + } goto out; } -- cgit v1.2.3-70-g09d2 From a271623f871dda970319ca15dfad3a8c8c36249f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 22 Jan 2010 10:13:10 +0000 Subject: netdev: remove certain HAVE_ macros After netdev_ops compat code HAVE_* macros aren't needed, in fact they _will_ result in compile breakage for out of tree drivers. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 468a11dea58..b5fb51d0b8b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -621,30 +621,21 @@ struct net_device_ops { struct net_device *dev); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); -#define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); -#define HAVE_SET_RX_MODE void (*ndo_set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST void (*ndo_set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR int (*ndo_set_mac_address)(struct net_device *dev, void *addr); -#define HAVE_VALIDATE_ADDR int (*ndo_validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); -#define HAVE_CHANGE_MTU int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); @@ -656,7 +647,6 @@ struct net_device_ops { void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER -#define HAVE_NETDEV_POLL void (*ndo_poll_controller)(struct net_device *dev); #endif #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -- cgit v1.2.3-70-g09d2 From 9df5f74194871ebd0e51ef5ad2eca5084acaaaba Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 25 Jan 2010 11:42:20 -0600 Subject: mm: add coherence API for DMA to vmalloc/vmap areas On Virtually Indexed architectures (which don't do automatic alias resolution in their caches), we have to flush via the correct virtual address to prepare pages for DMA. On some architectures (like arm) we cannot prevent the CPU from doing data movein along the alias (and thus giving stale read data), so we not only have to introduce a flush API to push dirty cache lines out, but also an invalidate API to kill inconsistent cache lines that may have moved in before DMA changed the data Signed-off-by: James Bottomley --- Documentation/cachetlb.txt | 24 ++++++++++++++++++++++++ include/linux/highmem.h | 6 ++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt index da42ab414c4..b231414bb8b 100644 --- a/Documentation/cachetlb.txt +++ b/Documentation/cachetlb.txt @@ -377,3 +377,27 @@ maps this page at its virtual address. All the functionality of flush_icache_page can be implemented in flush_dcache_page and update_mmu_cache. In 2.7 the hope is to remove this interface completely. + +The final category of APIs is for I/O to deliberately aliased address +ranges inside the kernel. Such aliases are set up by use of the +vmap/vmalloc API. Since kernel I/O goes via physical pages, the I/O +subsystem assumes that the user mapping and kernel offset mapping are +the only aliases. This isn't true for vmap aliases, so anything in +the kernel trying to do I/O to vmap areas must manually manage +coherency. It must do this by flushing the vmap range before doing +I/O and invalidating it after the I/O returns. + + void flush_kernel_vmap_range(void *vaddr, int size) + flushes the kernel cache for a given virtual address range in + the vmap area. This is to make sure that any data the kernel + modified in the vmap range is made visible to the physical + page. The design is to make this area safe to perform I/O on. + Note that this API does *not* also flush the offset map alias + of the area. + + void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates + the cache for a given virtual address range in the vmap area + which prevents the processor from making the cache stale by + speculatively reading data while the I/O was occurring to the + physical pages. This is only necessary for data reads into the + vmap area. diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff449726..adfe1013b2b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -17,6 +17,12 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page static inline void flush_kernel_dcache_page(struct page *page) { } +static inline void flush_kernel_vmap_range(void *vaddr, int size) +{ +} +static inline void invalidate_kernel_vmap_range(void *vaddr, int size) +{ +} #endif #include -- cgit v1.2.3-70-g09d2 From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jan 2010 13:36:10 -0800 Subject: net: use helpers to access uc list V2 This patch introduces three macros to work with uc list from net drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 5 ++--- drivers/net/e1000/e1000_main.c | 4 ++-- drivers/net/igb/igb_main.c | 7 ++++--- drivers/net/ixgbe/ixgbe_common.c | 7 +++---- drivers/net/ixgbe/ixgbe_common.h | 2 +- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/ixgbe/ixgbe_type.h | 4 ++-- drivers/net/mv643xx_eth.c | 3 +-- drivers/net/niu.c | 4 ++-- drivers/net/stmmac/dwmac1000_core.c | 10 +++++----- drivers/net/virtio_net.c | 12 +++++++----- drivers/s390/net/qeth_l2_main.c | 2 +- include/linux/netdevice.h | 5 +++++ net/core/dev.c | 4 ++-- 14 files changed, 38 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index d83512d3e02..a7b6b12c1c0 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -48,7 +48,6 @@ #include #include #include -#include #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) #define BCM_CNIC 1 @@ -3579,14 +3578,14 @@ bnx2_set_rx_mode(struct net_device *dev) sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN; } - if (dev->uc.count > BNX2_MAX_UNICAST_ADDRESSES) { + if (netdev_uc_count(dev) > BNX2_MAX_UNICAST_ADDRESSES) { rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS; sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN | BNX2_RPM_SORT_USER0_PROM_VLAN; } else if (!(dev->flags & IFF_PROMISC)) { /* Add all entries into to the match filter list */ i = 0; - list_for_each_entry(ha, &dev->uc.list, list) { + netdev_for_each_uc_addr(ha, dev) { bnx2_set_mac_addr(bp, ha->addr, i + BNX2_START_UNICAST_ADDRESS_INDEX); sort_mode |= (1 << diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 87f575ca427..2ce88c5f75c 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2139,7 +2139,7 @@ static void e1000_set_rx_mode(struct net_device *netdev) rctl |= E1000_RCTL_VFE; } - if (netdev->uc.count > rar_entries - 1) { + if (netdev_uc_count(netdev) > rar_entries - 1) { rctl |= E1000_RCTL_UPE; } else if (!(netdev->flags & IFF_PROMISC)) { rctl &= ~E1000_RCTL_UPE; @@ -2162,7 +2162,7 @@ static void e1000_set_rx_mode(struct net_device *netdev) */ i = 1; if (use_uc) - list_for_each_entry(ha, &netdev->uc.list, list) { + netdev_for_each_uc_addr(ha, netdev) { if (i == rar_entries) break; e1000_rar_set(hw, ha->addr, i++); diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index d9679493c63..01cc29483e2 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -2905,12 +2905,13 @@ static int igb_write_uc_addr_list(struct net_device *netdev) int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev->uc.count > rar_entries) + if (netdev_uc_count(netdev) > rar_entries) return -ENOMEM; - if (netdev->uc.count && rar_entries) { + if (!netdev_uc_empty(netdev) && rar_entries) { struct netdev_hw_addr *ha; - list_for_each_entry(ha, &netdev->uc.list, list) { + + netdev_for_each_uc_addr(ha, netdev) { if (!rar_entries) break; igb_rar_set_qsel(adapter, ha->addr, diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 276c2aaa800..eb49020903c 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include "ixgbe.h" @@ -1347,7 +1346,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) /** * ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses * @hw: pointer to hardware structure - * @uc_list: the list of new addresses + * @netdev: pointer to net device structure * * The given list replaces any existing list. Clears the secondary addrs from * receive address registers. Uses unused receive address registers for the @@ -1357,7 +1356,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) * manually putting the device into promiscuous mode. **/ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, - struct list_head *uc_list) + struct net_device *netdev) { u32 i; u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc; @@ -1381,7 +1380,7 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, } /* Add the new addresses */ - list_for_each_entry(ha, uc_list, list) { + netdev_for_each_uc_addr(ha, netdev) { hw_dbg(hw, " Adding the secondary addresses:\n"); ixgbe_add_uc_addr(hw, ha->addr, 0); } diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h index dfff0ffaa50..13606d4809c 100644 --- a/drivers/net/ixgbe/ixgbe_common.h +++ b/drivers/net/ixgbe/ixgbe_common.h @@ -60,7 +60,7 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, ixgbe_mc_addr_itr func); s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, - struct list_head *uc_list); + struct net_device *netdev); s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw); s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw); s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index ee41d331a35..439645d2aee 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -2568,7 +2568,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); /* reprogram secondary unicast list */ - hw->mac.ops.update_uc_addr_list(hw, &netdev->uc.list); + hw->mac.ops.update_uc_addr_list(hw, netdev); /* reprogram multicast list */ addr_count = netdev->mc_count; diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index b4caa7011a2..0db67c19b2c 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -30,7 +30,7 @@ #include #include -#include +#include /* Vendor ID */ #define IXGBE_INTEL_VENDOR_ID 0x8086 @@ -2405,7 +2405,7 @@ struct ixgbe_mac_operations { s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32); s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32); s32 (*init_rx_addrs)(struct ixgbe_hw *); - s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct list_head *); + s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct net_device *); s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32, ixgbe_mc_addr_itr); s32 (*enable_mc)(struct ixgbe_hw *); diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index af67af55efe..e24072a9a97 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -55,7 +55,6 @@ #include #include #include -#include static char mv643xx_eth_driver_name[] = "mv643xx_eth"; static char mv643xx_eth_driver_version[] = "1.4"; @@ -1697,7 +1696,7 @@ static u32 uc_addr_filter_mask(struct net_device *dev) return 0; nibbles = 1 << (dev->dev_addr[5] & 0x0f); - list_for_each_entry(ha, &dev->uc.list, list) { + netdev_for_each_uc_addr(ha, dev) { if (memcmp(dev->dev_addr, ha->addr, 5)) return 0; if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 0e260cfbff7..af9a8647c7e 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -6372,7 +6372,7 @@ static void niu_set_rx_mode(struct net_device *dev) if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 0)) np->flags |= NIU_FLAGS_MCAST; - alt_cnt = dev->uc.count; + alt_cnt = netdev_uc_count(dev); if (alt_cnt > niu_num_alt_addr(np)) { alt_cnt = 0; np->flags |= NIU_FLAGS_PROMISC; @@ -6381,7 +6381,7 @@ static void niu_set_rx_mode(struct net_device *dev) if (alt_cnt) { int index = 0; - list_for_each_entry(ha, &dev->uc.list, list) { + netdev_for_each_uc_addr(ha, dev) { err = niu_set_alt_mac(np, index, ha->addr); if (err) printk(KERN_WARNING PFX "%s: Error %d " diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c index 928eac05b91..d812e9cdb3d 100644 --- a/drivers/net/stmmac/dwmac1000_core.c +++ b/drivers/net/stmmac/dwmac1000_core.c @@ -83,7 +83,7 @@ static void dwmac1000_set_filter(struct net_device *dev) unsigned int value = 0; DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n", - __func__, dev->mc_count, dev->uc.count); + __func__, dev->mc_count, netdev_uc_count(dev)); if (dev->flags & IFF_PROMISC) value = GMAC_FRAME_FILTER_PR; @@ -117,7 +117,7 @@ static void dwmac1000_set_filter(struct net_device *dev) } /* Handle multiple unicast addresses (perfect filtering)*/ - if (dev->uc.count > GMAC_MAX_UNICAST_ADDRESSES) + if (netdev_uc_count(dev) > GMAC_MAX_UNICAST_ADDRESSES) /* Switch to promiscuous mode is more than 16 addrs are required */ value |= GMAC_FRAME_FILTER_PR; @@ -125,9 +125,9 @@ static void dwmac1000_set_filter(struct net_device *dev) int reg = 1; struct netdev_hw_addr *ha; - list_for_each_entry(ha, &dev->uc.list, list) { - dwmac1000_set_umac_addr(ioaddr, ha->addr, reg); - reg++; + netdev_for_each_uc_addr(ha, dev) { + dwmac1000_set_umac_addr(ioaddr, ha->addr, reg); + reg++; } } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c708ecc3cb2..088332a943f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -675,6 +675,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) struct virtio_net_ctrl_mac *mac_data; struct dev_addr_list *addr; struct netdev_hw_addr *ha; + int uc_count; void *buf; int i; @@ -701,8 +702,9 @@ static void virtnet_set_rx_mode(struct net_device *dev) dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n", allmulti ? "en" : "dis"); + uc_count = netdev_uc_count(dev); /* MAC filter - use one buffer for both lists */ - mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) + + mac_data = buf = kzalloc(((uc_count + dev->mc_count) * ETH_ALEN) + (2 * sizeof(mac_data->entries)), GFP_ATOMIC); if (!buf) { dev_warn(&dev->dev, "No memory for MAC address buffer\n"); @@ -712,16 +714,16 @@ static void virtnet_set_rx_mode(struct net_device *dev) sg_init_table(sg, 2); /* Store the unicast list and count in the front of the buffer */ - mac_data->entries = dev->uc.count; + mac_data->entries = uc_count; i = 0; - list_for_each_entry(ha, &dev->uc.list, list) + netdev_for_each_uc_addr(ha, dev) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[0], mac_data, - sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN)); + sizeof(mac_data->entries) + (uc_count * ETH_ALEN)); /* multicast list and count fill the end */ - mac_data = (void *)&mac_data->macs[dev->uc.count][0]; + mac_data = (void *)&mac_data->macs[uc_count][0]; mac_data->entries = dev->mc_count; addr = dev->mc_list; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index c3258b0dd64..51fde6f2e0b 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -622,7 +622,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev) for (dm = dev->mc_list; dm; dm = dm->next) qeth_l2_add_mc(card, dm->da_addr, 0); - list_for_each_entry(ha, &dev->uc.list, list) + netdev_for_each_uc_addr(ha, dev) qeth_l2_add_mc(card, ha->addr, 1); spin_unlock_bh(&card->mclock); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5fb51d0b8b..93a32a5ca74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -263,6 +263,11 @@ struct netdev_hw_addr_list { int count; }; +#define netdev_uc_count(dev) ((dev)->uc.count) +#define netdev_uc_empty(dev) ((dev)->uc.count == 0) +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ diff --git a/net/core/dev.c b/net/core/dev.c index 4fad9db417b..2cba5c521e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3665,10 +3665,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc.count > 0 && !dev->uc_promisc) { + if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc.count == 0 && dev->uc_promisc) { + } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } -- cgit v1.2.3-70-g09d2 From abd50713944c8ea9e0af5b7bffa0aacae21cc91a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jan 2010 18:50:16 +0100 Subject: perf: Reimplement frequency driven sampling There was a bug in the old period code that caused intel_pmu_enable_all() or native_write_msr_safe() to show up quite high in the profiles. In staring at that code it made my head hurt, so I rewrote it in a hopefully simpler fashion. Its now fully symetric between tick and overflow driven adjustments and uses less data to boot. The only complication is that it basically wants to do a u128 division. The code approximates that in a rather simple truncate until it fits fashion, taking care to balance the terms while truncating. This version does not generate that sampling artefact. Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +- kernel/perf_event.c | 132 +++++++++++++++++++++++++++++++-------------- 2 files changed, 94 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c6f812e4d05..72b2615600d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -498,9 +498,8 @@ struct hw_perf_event { atomic64_t period_left; u64 interrupts; - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; + u64 freq_time_stamp; + u64 freq_count_stamp; #endif }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index edc46b92b50..251fb955249 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task) static void perf_log_throttle(struct perf_event *event, int enable); -static void perf_adjust_period(struct perf_event *event, u64 events) +static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) +{ + u64 frequency = event->attr.sample_freq; + u64 sec = NSEC_PER_SEC; + u64 divisor, dividend; + + int count_fls, nsec_fls, frequency_fls, sec_fls; + + count_fls = fls64(count); + nsec_fls = fls64(nsec); + frequency_fls = fls64(frequency); + sec_fls = 30; + + /* + * We got @count in @nsec, with a target of sample_freq HZ + * the target period becomes: + * + * @count * 10^9 + * period = ------------------- + * @nsec * sample_freq + * + */ + + /* + * Reduce accuracy by one bit such that @a and @b converge + * to a similar magnitude. + */ +#define REDUCE_FLS(a, b) \ +do { \ + if (a##_fls > b##_fls) { \ + a >>= 1; \ + a##_fls--; \ + } else { \ + b >>= 1; \ + b##_fls--; \ + } \ +} while (0) + + /* + * Reduce accuracy until either term fits in a u64, then proceed with + * the other, so that finally we can do a u64/u64 division. + */ + while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { + REDUCE_FLS(nsec, frequency); + REDUCE_FLS(sec, count); + } + + if (count_fls + sec_fls > 64) { + divisor = nsec * frequency; + + while (count_fls + sec_fls > 64) { + REDUCE_FLS(count, sec); + divisor >>= 1; + } + + dividend = count * sec; + } else { + dividend = count * sec; + + while (nsec_fls + frequency_fls > 64) { + REDUCE_FLS(nsec, frequency); + dividend >>= 1; + } + + divisor = nsec * frequency; + } + + return div64_u64(dividend, divisor); +} + +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; u64 period, sample_period; s64 delta; - events *= hwc->sample_period; - period = div64_u64(events, event->attr.sample_freq); + period = perf_calculate_period(event, nsec, count); delta = (s64)(period - hwc->sample_period); delta = (delta + 7) / 8; /* low pass filter */ @@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) sample_period = 1; hwc->sample_period = sample_period; + + if (atomic64_read(&hwc->period_left) > 8*sample_period) { + perf_disable(); + event->pmu->disable(event); + atomic64_set(&hwc->period_left, 0); + event->pmu->enable(event); + perf_enable(); + } } static void perf_ctx_adjust_freq(struct perf_event_context *ctx) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, freq; + u64 interrupts, now; + s64 delta; raw_spin_lock(&ctx->lock); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { @@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); event->pmu->unthrottle(event); - interrupts = 2*sysctl_perf_event_sample_rate/HZ; } if (!event->attr.freq || !event->attr.sample_freq) continue; - /* - * if the specified freq < HZ then we need to skip ticks - */ - if (event->attr.sample_freq < HZ) { - freq = event->attr.sample_freq; - - hwc->freq_count += freq; - hwc->freq_interrupts += interrupts; - - if (hwc->freq_count < HZ) - continue; - - interrupts = hwc->freq_interrupts; - hwc->freq_interrupts = 0; - hwc->freq_count -= HZ; - } else - freq = HZ; - - perf_adjust_period(event, freq * interrupts); + event->pmu->read(event); + now = atomic64_read(&event->count); + delta = now - hwc->freq_count_stamp; + hwc->freq_count_stamp = now; - /* - * In order to avoid being stalled by an (accidental) huge - * sample period, force reset the sample period if we didn't - * get any events in this freq period. - */ - if (!interrupts) { - perf_disable(); - event->pmu->disable(event); - atomic64_set(&hwc->period_left, 0); - event->pmu->enable(event); - perf_enable(); - } + if (delta > 0) + perf_adjust_period(event, TICK_NSEC, delta); } raw_spin_unlock(&ctx->lock); } @@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, if (event->attr.freq) { u64 now = perf_clock(); - s64 delta = now - hwc->freq_stamp; + s64 delta = now - hwc->freq_time_stamp; - hwc->freq_stamp = now; + hwc->freq_time_stamp = now; - if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(event, NSEC_PER_SEC / (int)delta); + if (delta > 0 && delta < 2*TICK_NSEC) + perf_adjust_period(event, delta, hwc->last_period); } /* -- cgit v1.2.3-70-g09d2 From 6016a363f6b56b46b24655bcfc0499b715851cf3 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 28 Jan 2010 14:06:53 -0700 Subject: of: unify phandle name in struct device_node In struct device_node, the phandle is named 'linux_phandle' for PowerPC and MicroBlaze, and 'node' for SPARC. There is no good reason for the difference, it is just an artifact of the code diverging over a couple of years. This patch renames both to simply .phandle. Note: the .node also existed in PowerPC/MicroBlaze, but the only user seems to be arch/powerpc/platforms/powermac/pfunc_core.c. It doesn't look like the assignment between .linux_phandle and .node is significantly different enough to warrant the separate code paths unless ibm,phandle properties actually appear in Apple device trees. I think it is safe to eliminate the old .node property and use phandle everywhere. Signed-off-by: Grant Likely Acked-by: David S. Miller Tested-by: Wolfram Sang Acked-by: Benjamin Herrenschmidt --- arch/microblaze/kernel/of_platform.c | 2 +- arch/microblaze/kernel/prom.c | 2 +- arch/powerpc/kernel/of_platform.c | 2 +- arch/powerpc/kernel/prom.c | 6 +++--- arch/powerpc/platforms/cell/spu_manage.c | 6 +++--- arch/powerpc/platforms/powermac/pfunc_core.c | 2 +- arch/sparc/kernel/devices.c | 2 +- arch/sparc/kernel/of_device_32.c | 2 +- arch/sparc/kernel/of_device_64.c | 2 +- arch/sparc/kernel/prom_common.c | 8 ++++---- arch/sparc/kernel/smp_64.c | 2 +- drivers/of/fdt.c | 7 +++---- drivers/sbus/char/openprom.c | 10 +++++----- drivers/video/aty/atyfb_base.c | 2 +- include/linux/of.h | 5 +---- sound/aoa/fabrics/layout.c | 2 +- 16 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c index acf4574d0f1..1c6d684996d 100644 --- a/arch/microblaze/kernel/of_platform.c +++ b/arch/microblaze/kernel/of_platform.c @@ -185,7 +185,7 @@ EXPORT_SYMBOL(of_find_device_by_node); static int of_dev_phandle_match(struct device *dev, void *data) { phandle *ph = data; - return to_of_device(dev)->node->linux_phandle == *ph; + return to_of_device(dev)->node->phandle == *ph; } struct of_device *of_find_device_by_phandle(phandle ph) diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 46407e64392..6eff83a7121 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -342,7 +342,7 @@ struct device_node *of_find_node_by_phandle(phandle handle) read_lock(&devtree_lock); for (np = allnodes; np != NULL; np = np->allnext) - if (np->linux_phandle == handle) + if (np->phandle == handle) break; of_node_get(np); read_unlock(&devtree_lock); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 1a4fc0d11a0..666d08db319 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -214,7 +214,7 @@ EXPORT_SYMBOL(of_find_device_by_node); static int of_dev_phandle_match(struct device *dev, void *data) { phandle *ph = data; - return to_of_device(dev)->node->linux_phandle == *ph; + return to_of_device(dev)->node->phandle == *ph; } struct of_device *of_find_device_by_phandle(phandle ph) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index deccd91d7e8..1ed2ec2ea05 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -778,7 +778,7 @@ struct device_node *of_find_node_by_phandle(phandle handle) read_lock(&devtree_lock); for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == handle) + if (np->phandle == handle) break; of_node_get(np); read_unlock(&devtree_lock); @@ -907,9 +907,9 @@ static int of_finish_dynamic_node(struct device_node *node) if (machine_is(powermac)) return -ENODEV; - /* fix up new node's linux_phandle field */ + /* fix up new node's phandle field */ if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->linux_phandle = *ibm_phandle; + node->phandle = *ibm_phandle; out: of_node_put(parent); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 4c506c1463c..891f18e337a 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -457,7 +457,7 @@ neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) continue; vic_handles = of_get_property(spu_dn, "vicinity", &lenp); for (i=0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == target->linux_phandle) + if (vic_handles[i] == target->phandle) return spu; } } @@ -499,7 +499,7 @@ static void init_affinity_node(int cbe) if (strcmp(name, "spe") == 0) { spu = devnode_spu(cbe, vic_dn); - avoid_ph = last_spu_dn->linux_phandle; + avoid_ph = last_spu_dn->phandle; } else { /* * "mic-tm" and "bif0" nodes do not have @@ -514,7 +514,7 @@ static void init_affinity_node(int cbe) last_spu->has_mem_affinity = 1; spu->has_mem_affinity = 1; } - avoid_ph = vic_dn->linux_phandle; + avoid_ph = vic_dn->phandle; } list_add_tail(&spu->aff_list, &last_spu->aff_list); diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index 96d5ce50364..ede49e78a8d 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -842,7 +842,7 @@ struct pmf_function *__pmf_find_function(struct device_node *target, list_for_each_entry(func, &dev->functions, link) { if (name && strcmp(name, func->name)) continue; - if (func->phandle && target->node != func->phandle) + if (func->phandle && target->phandle != func->phandle) continue; if ((func->flags & flags) == 0) continue; diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c index b171ae8de90..b062de9424a 100644 --- a/arch/sparc/kernel/devices.c +++ b/arch/sparc/kernel/devices.c @@ -59,7 +59,7 @@ static int __cpu_find_by(int (*compare)(int, int, void *), void *compare_arg, cur_inst = 0; for_each_node_by_type(dp, "cpu") { - int err = check_cpu_node(dp->node, &cur_inst, + int err = check_cpu_node(dp->phandle, &cur_inst, compare, compare_arg, prom_node, mid); if (!err) { diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 4c26eb59e74..09138d403c7 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -433,7 +433,7 @@ build_resources: if (!parent) dev_set_name(&op->dev, "root"); else - dev_set_name(&op->dev, "%08x", dp->node); + dev_set_name(&op->dev, "%08x", dp->phandle); if (of_device_register(op)) { printk("%s: Could not register of device.\n", diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 881947e59e9..036f18ae59a 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -666,7 +666,7 @@ static struct of_device * __init scan_one_device(struct device_node *dp, if (!parent) dev_set_name(&op->dev, "root"); else - dev_set_name(&op->dev, "%08x", dp->node); + dev_set_name(&op->dev, "%08x", dp->phandle); if (of_device_register(op)) { printk("%s: Could not register of device.\n", diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index d80a65d9e89..5832e13dfee 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -42,7 +42,7 @@ struct device_node *of_find_node_by_phandle(phandle handle) struct device_node *np; for (np = allnodes; np; np = np->allnext) - if (np->node == handle) + if (np->phandle == handle) break; return np; @@ -89,7 +89,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len void *old_val = prop->value; int ret; - ret = prom_setprop(dp->node, name, val, len); + ret = prom_setprop(dp->phandle, name, val, len); err = -EINVAL; if (ret >= 0) { @@ -236,7 +236,7 @@ static struct device_node * __init prom_create_node(phandle node, dp->name = get_one_property(node, "name"); dp->type = get_one_property(node, "device_type"); - dp->node = node; + dp->phandle = node; dp->properties = build_prop_list(node); @@ -313,7 +313,7 @@ void __init prom_build_devicetree(void) nextp = &allnodes->allnext; allnodes->child = prom_build_tree(allnodes, - prom_getchild(allnodes->node), + prom_getchild(allnodes->phandle), &nextp); of_console_init(); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index aa36223497b..eb14844a002 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -370,7 +370,7 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu) } else { struct device_node *dp = of_find_node_by_cpuid(cpu); - prom_startcpu(dp->node, entry, cookie); + prom_startcpu(dp->phandle, entry, cookie); } for (timeout = 0; timeout < 50000; timeout++) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 616a4767a95..7f8861121a3 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -310,12 +310,11 @@ unsigned long __init unflatten_dt_node(unsigned long mem, __alignof__(struct property)); if (allnextpp) { if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; + if (np->phandle == 0) + np->phandle = *((u32 *)*p); } if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); + np->phandle = *((u32 *)*p); pp->name = pname; pp->length = sz; pp->value = (void *)*p; diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index 75ac19b1192..fc2f676e984 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -233,7 +233,7 @@ static int opromnext(void __user *argp, unsigned int cmd, struct device_node *dp ph = 0; if (dp) - ph = dp->node; + ph = dp->phandle; data->current_node = dp; *((int *) op->oprom_array) = ph; @@ -256,7 +256,7 @@ static int oprompci2node(void __user *argp, struct device_node *dp, struct openp dp = pci_device_to_OF_node(pdev); data->current_node = dp; - *((int *)op->oprom_array) = dp->node; + *((int *)op->oprom_array) = dp->phandle; op->oprom_size = sizeof(int); err = copyout(argp, op, bufsize + sizeof(int)); @@ -273,7 +273,7 @@ static int oprompath2node(void __user *argp, struct device_node *dp, struct open dp = of_find_node_by_path(op->oprom_array); if (dp) - ph = dp->node; + ph = dp->phandle; data->current_node = dp; *((int *)op->oprom_array) = ph; op->oprom_size = sizeof(int); @@ -540,7 +540,7 @@ static int opiocgetnext(unsigned int cmd, void __user *argp) } } if (dp) - nd = dp->node; + nd = dp->phandle; if (copy_to_user(argp, &nd, sizeof(phandle))) return -EFAULT; @@ -570,7 +570,7 @@ static int openprom_bsd_ioctl(struct inode * inode, struct file * file, case OPIOCGETOPTNODE: BUILD_BUG_ON(sizeof(phandle) != sizeof(int)); - if (copy_to_user(argp, &options_node->node, sizeof(phandle))) + if (copy_to_user(argp, &options_node->phandle, sizeof(phandle))) return -EFAULT; return 0; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 913b4a47ae5..bb20987d58a 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -3104,7 +3104,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev, } dp = pci_device_to_OF_node(pdev); - if (node == dp->node) { + if (node == dp->phandle) { struct fb_var_screeninfo *var = &default_var; unsigned int N, P, Q, M, T, R; u32 v_total, h_total; diff --git a/include/linux/of.h b/include/linux/of.h index d4c014a35ea..dbabf86e0b7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -39,10 +39,7 @@ struct of_irq_controller; struct device_node { const char *name; const char *type; - phandle node; -#if !defined(CONFIG_SPARC) - phandle linux_phandle; -#endif + phandle phandle; char *full_name; struct property *properties; diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c index 586965f9605..7a437da0564 100644 --- a/sound/aoa/fabrics/layout.c +++ b/sound/aoa/fabrics/layout.c @@ -768,7 +768,7 @@ static int check_codec(struct aoa_codec *codec, "required property %s not present\n", propname); return -ENODEV; } - if (*ref != codec->node->linux_phandle) { + if (*ref != codec->node->phandle) { printk(KERN_INFO "snd-aoa-fabric-layout: " "%s doesn't match!\n", propname); return -ENODEV; -- cgit v1.2.3-70-g09d2 From 430ad5a600a83956749307b13257c464c3826b55 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 28 Jan 2010 09:32:29 +0800 Subject: perf: Factorize trace events raw sample buffer operations Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to gather the common code that operates on raw events sampling buffer. This cleans up redundant code between regular trace events, syscall events and kprobe events. Changelog v1->v2: - Rename function name as per Masami and Frederic's suggestion - Add __kprobes for ftrace_perf_buf_prepare() and make ftrace_perf_buf_submit() inline as per Masami's suggestion - Export ftrace_perf_buf_prepare since modules will use it Signed-off-by: Xiao Guangrong Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Steven Rostedt Cc: Paul Mackerras Cc: Jason Baron Cc: Peter Zijlstra LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 18 ++++++-- include/trace/ftrace.h | 48 +++------------------ kernel/trace/trace_event_profile.c | 52 ++++++++++++++++++++--- kernel/trace/trace_kprobe.c | 86 +++++--------------------------------- kernel/trace/trace_syscalls.c | 71 +++++-------------------------- 5 files changed, 88 insertions(+), 187 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0a09e758c7d..cd95919d9ff 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -5,6 +5,7 @@ #include #include #include +#include struct trace_array; struct tracer; @@ -138,9 +139,6 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *perf_trace_buf; -extern char *perf_trace_buf_nmi; - #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ @@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); +extern void * +ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, + unsigned long *irq_flags); + +static inline void +ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, + u64 count, unsigned long irq_flags) +{ + struct trace_entry *entry = raw_data; + + perf_tp_event(entry->type, addr, count, raw_data, size); + perf_swevent_put_recursion_context(rctx); + local_irq_restore(irq_flags); +} #endif #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4a46a60c207..f2c09e4d656 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(void); \ - extern void perf_swevent_put_recursion_context(int rctx); \ - extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ - struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ - char *trace_buf; \ - char *raw_data; \ - int __cpu; \ int rctx; \ - int pc; \ - \ - pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ @@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ "profile buffer not large enough")) \ return; \ - \ - local_irq_save(irq_flags); \ - \ - rctx = perf_swevent_get_recursion_context(); \ - if (rctx < 0) \ - goto end_recursion; \ - \ - __cpu = smp_processor_id(); \ - \ - if (in_nmi()) \ - trace_buf = rcu_dereference(perf_trace_buf_nmi); \ - else \ - trace_buf = rcu_dereference(perf_trace_buf); \ - \ - if (!trace_buf) \ - goto end; \ - \ - raw_data = per_cpu_ptr(trace_buf, __cpu); \ - \ - *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ - entry = (struct ftrace_raw_##call *)raw_data; \ - ent = &entry->ent; \ - tracing_generic_entry_update(ent, irq_flags, pc); \ - ent->type = event_call->id; \ - \ + entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \ + __entry_size, event_call->id, &rctx, &irq_flags); \ + if (!entry) \ + return; \ tstruct \ \ { assign; } \ \ - perf_tp_event(event_call->id, __addr, __count, entry, \ - __entry_size); \ - \ -end: \ - perf_swevent_put_recursion_context(rctx); \ -end_recursion: \ - local_irq_restore(irq_flags); \ + ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \ + __count, irq_flags); \ } #undef DEFINE_EVENT diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242c..f0d69300507 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -6,14 +6,12 @@ */ #include +#include #include "trace.h" -char *perf_trace_buf; -EXPORT_SYMBOL_GPL(perf_trace_buf); - -char *perf_trace_buf_nmi; -EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +static char *perf_trace_buf; +static char *perf_trace_buf_nmi; typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; @@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) } mutex_unlock(&event_mutex); } + +__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, + int *rctxp, unsigned long *irq_flags) +{ + struct trace_entry *entry; + char *trace_buf, *raw_data; + int pc, cpu; + + pc = preempt_count(); + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(*irq_flags); + + *rctxp = perf_swevent_get_recursion_context(); + if (*rctxp < 0) + goto err_recursion; + + cpu = smp_processor_id(); + + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); + + if (!trace_buf) + goto err; + + raw_data = per_cpu_ptr(trace_buf, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + entry = (struct trace_entry *)raw_data; + tracing_generic_entry_update(entry, *irq_flags, pc); + entry->type = type; + + return raw_data; +err: + perf_swevent_put_recursion_context(*rctxp); +err_recursion: + local_irq_restore(*irq_flags); + return NULL; +} +EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d6266cad695..2e28ee36646 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kprobe_trace_entry *)raw_data; - ent = &entry->ent; + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); return 0; } @@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kretprobe_trace_entry *)raw_data; - ent = &entry->ent; + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f694f66d75b..4e332b9e449 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; unsigned long flags; - char *trace_buf; - char *raw_data; int syscall_nr; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) "profile buffer not large enough")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, + sys_data->enter_event->id, &rctx, &flags); + if (!rec) + return; - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysenter_enable(struct ftrace_event_call *call) @@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *rec; unsigned long flags; int syscall_nr; - char *trace_buf; - char *raw_data; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) "exit event has grown above profile buffer size")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - - rec = (struct syscall_trace_exit *)raw_data; + rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, + sys_data->exit_event->id, &rctx, &flags); + if (!rec) + return; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysexit_enable(struct ftrace_event_call *call) -- cgit v1.2.3-70-g09d2 From 9f41699ed067fa695faff8e2e9981b2550abec62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:59:29 +0100 Subject: bitops: Provide compile time HWEIGHT{8,16,32,64} Provide compile time versions of hweight. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner LKML-Reference: <20100122155535.797688466@chello.nl> [ Remove some whitespace damage while we are at it ] Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c05a29cb9bb..ba0fd1eb4af 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -25,7 +25,7 @@ static __inline__ int get_bitmask_order(unsigned int count) { int order; - + order = fls(count); return order; /* We could be slightly more clever with -1 here... */ } @@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count) static __inline__ int get_count_order(unsigned int count) { int order; - + order = fls(count) - 1; if (count & (count - 1)) order++; @@ -45,6 +45,20 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } +#define HWEIGHT8(w) \ + ( (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))) ) + +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8)) +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16)) +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32)) + /** * rol32 - rotate a 32-bit value left * @word: value to rotate -- cgit v1.2.3-70-g09d2 From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Jan 2010 08:39:39 +0100 Subject: perf, x86: Clean up event constraints code a bit - Remove stray debug code - Improve ugly macros a bit - Remove some whitespace damage - (Also fix up some accumulated damage in perf_event.h) Signed-off-by: Ingo Molnar Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/cpu/perf_event.c | 37 ++++++++----------------------------- include/linux/perf_event.h | 24 +++++++++++------------- 2 files changed, 19 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66de282ad2f..fdbe2484227 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,24 +93,19 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define EVENT_CONSTRAINT(c, n, m) { \ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = HWEIGHT64((u64)(n)), \ } -#define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define INTEL_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define FIXED_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +#define EVENT_CONSTRAINT_END EVENT_CONSTRAINT(0, 0, 0) -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) - -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->cmask; (e)++) +#define for_each_event_constraint(e, c) for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -1276,14 +1271,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (test_bit(hwc->idx, used_mask)) break; -#if 0 - pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - hwc->idx, - assign ? 'y' : 'n'); -#endif - set_bit(hwc->idx, used_mask); if (assign) assign[i] = hwc->idx; @@ -1333,14 +1320,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; -#if 0 - pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - j, - assign ? 'y' : 'n'); -#endif - set_bit(j, used_mask); if (assign) @@ -2596,9 +2575,9 @@ static const struct pmu pmu = { * validate a single event group * * validation include: - * - check events are compatible which each other - * - events do not compete for the same counter - * - number of events <= number of counters + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters * * validation ensures the group can be loaded onto the * PMU if it was the only group available. diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 72b2615600d..953c17731e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -290,7 +290,7 @@ struct perf_event_mmap_page { }; #define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) -#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) #define PERF_RECORD_MISC_USER (2 << 0) #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) @@ -356,8 +356,8 @@ enum perf_event_type { * u64 stream_id; * }; */ - PERF_RECORD_THROTTLE = 5, - PERF_RECORD_UNTHROTTLE = 6, + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, /* * struct { @@ -371,10 +371,10 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, tid; + * struct perf_event_header header; + * u32 pid, tid; * - * struct read_format values; + * struct read_format values; * }; */ PERF_RECORD_READ = 8, @@ -412,7 +412,7 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * }; */ - PERF_RECORD_SAMPLE = 9, + PERF_RECORD_SAMPLE = 9, PERF_RECORD_MAX, /* non-ABI */ }; @@ -752,8 +752,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next); +extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); @@ -853,8 +852,7 @@ extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); -extern void perf_tp_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); +extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags @@ -895,13 +893,13 @@ static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } static inline void -perf_bp_event(struct perf_event *event, void *data) { } +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } -- cgit v1.2.3-70-g09d2 From 488991e28e55b4fbca8067edf0259f69d1a6f92c Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Fri, 29 Jan 2010 09:04:08 +0100 Subject: block: Added in stricter no merge semantics for block I/O Updated 'nomerges' tunable to accept a value of '2' - indicating that _no_ merges at all are to be attempted (not even the simple one-hit cache). The following table illustrates the additional benefit - 5 minute runs of a random I/O load were applied to a dozen devices on a 16-way x86_64 system. nomerges Throughput %System Improvement (tput / %sys) -------- ------------ ----------- ------------------------- 0 12.45 MB/sec 0.669365609 1 12.50 MB/sec 0.641519199 0.40% / 2.71% 2 12.52 MB/sec 0.639849750 0.56% / 2.96% Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 14 ++++++++++++++ Documentation/block/queue-sysfs.txt | 10 +++++----- block/blk-sysfs.c | 11 +++++++---- block/elevator.c | 11 ++++++++++- include/linux/blkdev.h | 3 +++ 5 files changed, 39 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index d2f90334bb9..4873c759d53 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -128,3 +128,17 @@ Description: preferred request size for workloads where sustained throughput is desired. If no optimal I/O size is reported this file contains 0. + +What: /sys/block//queue/nomerges +Date: January 2010 +Contact: +Description: + Standard I/O elevator operations include attempts to + merge contiguous I/Os. For known random I/O loads these + attempts will always fail and result in extra cycles + being spent in the kernel. This allows one to turn off + this behavior on one of two ways: When set to 1, complex + merge checks are disabled, but the simple one-shot merges + with the previous I/O request are enabled. When set to 2, + all merge tries are disabled. The default value is 0 - + which enables all types of merge tries. diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e164403f60e..f65274081c8 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -25,11 +25,11 @@ size allowed by the hardware. nomerges (RW) ------------- -This enables the user to disable the lookup logic involved with IO merging -requests in the block layer. Merging may still occur through a direct -1-hit cache, since that comes for (almost) free. The IO scheduler will not -waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults -to 0, enabling all merges. +This enables the user to disable the lookup logic involved with IO +merging requests in the block layer. By default (0) all merges are +enabled. When set to 1 only simple one-hit merges will be tried. When +set to 2 no merge algorithms will be tried (including one-hit or more +complex tree/hash lookups). nr_requests (RW) ---------------- diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8606c9543fd..e85442415db 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { - return queue_var_show(blk_queue_nomerges(q), page); + return queue_var_show((blk_queue_nomerges(q) << 1) | + blk_queue_noxmerges(q), page); } static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, @@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&nm, page, count); spin_lock_irq(q->queue_lock); - if (nm) + queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); + if (nm == 2) queue_flag_set(QUEUE_FLAG_NOMERGES, q); - else - queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + else if (nm) + queue_flag_set(QUEUE_FLAG_NOXMERGES, q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/elevator.c b/block/elevator.c index 9ad5ccc4c5e..ee3a883840f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -473,6 +473,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) struct request *__rq; int ret; + /* + * Levels of merges: + * nomerges: No merges at all attempted + * noxmerges: Only simple one-hit cache try + * merges: All merge tries attempted + */ + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + /* * First try one-hit cache. */ @@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } - if (blk_queue_nomerges(q)) + if (blk_queue_noxmerges(q)) return ELEVATOR_NO_MERGE; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffb13ad3571..f71f5c58620 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -463,6 +463,7 @@ struct request_queue #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ #define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 18 /* No extended merges */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -589,6 +590,8 @@ enum { #define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) +#define blk_queue_noxmerges(q) \ + test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) -- cgit v1.2.3-70-g09d2 From 1f5b8f8a2031ae9507eb67743cad4d424739bfff Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 28 Jan 2010 15:02:41 -0800 Subject: ntp: Make time_esterror and time_maxerror static Make time_esterror and time_maxerror static as no one uses them outside of ntp.c Signed-off-by: John Stultz Cc: richard@rsk.demon.co.uk LKML-Reference: <1264719761.3437.47.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 3 --- kernel/time/ntp.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 94f8faecdcb..7a082b32d8e 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -238,9 +238,6 @@ extern int tickadj; /* amount of adjustment per tick */ * phase-lock loop variables */ extern int time_status; /* clock synchronization status bits */ -extern long time_maxerror; /* maximum error */ -extern long time_esterror; /* estimated error */ - extern long time_adjust; /* The amount of adjtime left */ extern void ntp_init(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4800f933910..74b1b37b159 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -58,10 +58,10 @@ static s64 time_offset; static long time_constant = 2; /* maximum error (usecs): */ -long time_maxerror = NTP_PHASE_LIMIT; +static long time_maxerror = NTP_PHASE_LIMIT; /* estimated error (usecs): */ -long time_esterror = NTP_PHASE_LIMIT; +static long time_esterror = NTP_PHASE_LIMIT; /* frequency offset (scaled nsecs/secs): */ static s64 time_freq; -- cgit v1.2.3-70-g09d2 From ef7995f4e46b1677f3eaaf547316e1a910b38dcb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 29 Jan 2010 23:59:12 -0800 Subject: Input: implement input filters Sometimes it is desirable to suppress certain events from reaching input handlers and thus user space. One such example is Mac mouse button emulation code which catches certain key presses and converts them into button clicks as if they were emitted by a virtual mouse. The original key press events should be completely suppressed, otherwise user space will be confused, and while keyboard driver does it on its own evdev is blissfully unaware of this arrangement. This patch adds notion of 'filter' to the standard input handlers, which may flag event as filtered thus preventing it from reaching other input handlers. Filters don't (nor will they ever) have a notion of priority relative to each other, input core will run all of them first and any one of them may mark event as filtered. This patch is inspired by similar patch by Matthew Garret but the implementation and intended usage are quite different. Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 41 ++++++++++++++++++++++++++++++++++------- include/linux/input.h | 8 ++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 6c161e22086..7080a9d4b84 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -86,12 +86,14 @@ static int input_defuzz_abs_event(int value, int old_val, int fuzz) } /* - * Pass event through all open handles. This function is called with + * Pass event first through all filters and then, if event has not been + * filtered out, through all open handles. This function is called with * dev->event_lock held and interrupts disabled. */ static void input_pass_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { + struct input_handler *handler; struct input_handle *handle; rcu_read_lock(); @@ -99,11 +101,25 @@ static void input_pass_event(struct input_dev *dev, handle = rcu_dereference(dev->grab); if (handle) handle->handler->event(handle, type, code, value); - else - list_for_each_entry_rcu(handle, &dev->h_list, d_node) - if (handle->open) - handle->handler->event(handle, - type, code, value); + else { + bool filtered = false; + + list_for_each_entry_rcu(handle, &dev->h_list, d_node) { + if (!handle->open) + continue; + + handler = handle->handler; + if (!handler->filter) { + if (filtered) + break; + + handler->event(handle, type, code, value); + + } else if (handler->filter(handle, type, code, value)) + filtered = true; + } + } + rcu_read_unlock(); } @@ -990,6 +1006,8 @@ static int input_handlers_seq_show(struct seq_file *seq, void *v) union input_seq_state *state = (union input_seq_state *)&seq->private; seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name); + if (handler->filter) + seq_puts(seq, " (filter)"); if (handler->fops) seq_printf(seq, " Minor=%d", handler->minor); seq_putc(seq, '\n'); @@ -1803,7 +1821,16 @@ int input_register_handle(struct input_handle *handle) error = mutex_lock_interruptible(&dev->mutex); if (error) return error; - list_add_tail_rcu(&handle->d_node, &dev->h_list); + + /* + * Filters go to the head of the list, normal handlers + * to the tail. + */ + if (handler->filter) + list_add_rcu(&handle->d_node, &dev->h_list); + else + list_add_tail_rcu(&handle->d_node, &dev->h_list); + mutex_unlock(&dev->mutex); /* diff --git a/include/linux/input.h b/include/linux/input.h index 7be8a6537b5..6c9d3d49fa9 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1198,6 +1198,8 @@ struct input_handle; * @event: event handler. This method is being called by input core with * interrupts disabled and dev->event_lock spinlock held and so * it may not sleep + * @filter: similar to @event; separates normal event handlers from + * "filters". * @connect: called when attaching a handler to an input device * @disconnect: disconnects a handler from input device * @start: starts handler for given handle. This function is called by @@ -1219,6 +1221,11 @@ struct input_handle; * same time. All of them will get their copy of input event generated by * the device. * + * The very same structure is used to implement input filters. Input core + * allows filters to run first and will not pass event to regular handlers + * if any of the filters indicate that the event should be filtered (by + * returning %true from their filter() method). + * * Note that input core serializes calls to connect() and disconnect() * methods. */ @@ -1227,6 +1234,7 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); + bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); void (*disconnect)(struct input_handle *handle); void (*start)(struct input_handle *handle); -- cgit v1.2.3-70-g09d2 From 99b089c3c38a83ebaeb1cc4584ddcde841626467 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 30 Jan 2010 00:53:29 -0800 Subject: Input: Mac button emulation - implement as an input filter Current implementation of Mac mouse button emulation plugs into legacy keyboard driver, converts certain keys into button events on a separate device, and suppresses the real events from reaching tty. This worked well enough until user space started using evdev which was completely unaware of this arrangement and kept sending original key presses to its users. Change the implementation to use newly added input filter framework so that original key presses are not transmitted to any handlers. As a bonus remove SYSCTL dependencies from the code and use Kconfig instead; also do not create the emulated mouse device until user activates emulation. Signed-off-by: Dmitry Torokhov --- drivers/char/keyboard.c | 5 - drivers/macintosh/Kconfig | 1 + drivers/macintosh/mac_hid.c | 257 ++++++++++++++++++++++++++++++++------------ include/linux/kbd_kern.h | 3 - 4 files changed, 188 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index f706b1dffdb..cbf64b985ef 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1185,11 +1185,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw) rep = (down == 2); -#ifdef CONFIG_MAC_EMUMOUSEBTN - if (mac_hid_mouse_emulate_buttons(1, keycode, down)) - return; -#endif /* CONFIG_MAC_EMUMOUSEBTN */ - if ((raw_mode = (kbd->kbdmode == VC_RAW)) && !hw_raw) if (emulate_raw(vc, keycode, !down << 7)) if (keycode < BTN_MISC && printk_ratelimit()) diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 3d906833948..aa3c27e5255 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -172,6 +172,7 @@ config INPUT_ADBHID config MAC_EMUMOUSEBTN bool "Support for mouse button 2+3 emulation" + depends on SYSCTL select INPUT help This provides generic support for emulating the 2nd and 3rd mouse diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 7b4ef5bb556..0b210a90aef 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -13,17 +13,195 @@ #include #include #include -#include - -static struct input_dev *emumousebtn; -static int emumousebtn_input_register(void); static int mouse_emulate_buttons; static int mouse_button2_keycode = KEY_RIGHTCTRL; /* right control key */ static int mouse_button3_keycode = KEY_RIGHTALT; /* right option key */ -static int mouse_last_keycode; -#if defined(CONFIG_SYSCTL) +static struct input_dev *mac_hid_emumouse_dev; + +static int mac_hid_create_emumouse(void) +{ + static struct lock_class_key mac_hid_emumouse_dev_event_class; + static struct lock_class_key mac_hid_emumouse_dev_mutex_class; + int err; + + mac_hid_emumouse_dev = input_allocate_device(); + if (!mac_hid_emumouse_dev) + return -ENOMEM; + + lockdep_set_class(&mac_hid_emumouse_dev->event_lock, + &mac_hid_emumouse_dev_event_class); + lockdep_set_class(&mac_hid_emumouse_dev->mutex, + &mac_hid_emumouse_dev_mutex_class); + + mac_hid_emumouse_dev->name = "Macintosh mouse button emulation"; + mac_hid_emumouse_dev->id.bustype = BUS_ADB; + mac_hid_emumouse_dev->id.vendor = 0x0001; + mac_hid_emumouse_dev->id.product = 0x0001; + mac_hid_emumouse_dev->id.version = 0x0100; + + mac_hid_emumouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL); + mac_hid_emumouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = + BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT); + mac_hid_emumouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y); + + err = input_register_device(mac_hid_emumouse_dev); + if (err) { + input_free_device(mac_hid_emumouse_dev); + mac_hid_emumouse_dev = NULL; + return err; + } + + return 0; +} + +static void mac_hid_destroy_emumouse(void) +{ + input_unregister_device(mac_hid_emumouse_dev); + mac_hid_emumouse_dev = NULL; +} + +static bool mac_hid_emumouse_filter(struct input_handle *handle, + unsigned int type, unsigned int code, + int value) +{ + unsigned int btn; + + if (type != EV_KEY) + return false; + + if (code == mouse_button2_keycode) + btn = BTN_MIDDLE; + else if (code == mouse_button3_keycode) + btn = BTN_RIGHT; + else + return false; + + input_report_key(mac_hid_emumouse_dev, btn, value); + input_sync(mac_hid_emumouse_dev); + + return true; +} + +static int mac_hid_emumouse_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + struct input_handle *handle; + int error; + + /* Don't bind to ourselves */ + if (dev == mac_hid_emumouse_dev) + return -ENODEV; + + handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "mac-button-emul"; + + error = input_register_handle(handle); + if (error) { + printk(KERN_ERR + "mac_hid: Failed to register button emulation handle, " + "error %d\n", error); + goto err_free; + } + + error = input_open_device(handle); + if (error) { + printk(KERN_ERR + "mac_hid: Failed to open input device, error %d\n", + error); + goto err_unregister; + } + + return 0; + + err_unregister: + input_unregister_handle(handle); + err_free: + kfree(handle); + return error; +} + +static void mac_hid_emumouse_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id mac_hid_emumouse_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_KEY) }, + }, + { }, +}; + +MODULE_DEVICE_TABLE(input, mac_hid_emumouse_ids); + +static struct input_handler mac_hid_emumouse_handler = { + .filter = mac_hid_emumouse_filter, + .connect = mac_hid_emumouse_connect, + .disconnect = mac_hid_emumouse_disconnect, + .name = "mac-button-emul", + .id_table = mac_hid_emumouse_ids, +}; + +static int mac_hid_start_emulation(void) +{ + int err; + + err = mac_hid_create_emumouse(); + if (err) + return err; + + err = input_register_handler(&mac_hid_emumouse_handler); + if (err) { + mac_hid_destroy_emumouse(); + return err; + } + + return 0; +} + +static void mac_hid_stop_emulation(void) +{ + input_unregister_handler(&mac_hid_emumouse_handler); + mac_hid_destroy_emumouse(); +} + +static int mac_hid_toggle_emumouse(ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int *valp = table->data; + int old_val = *valp; + int rc; + + rc = proc_dointvec(table, write, buffer, lenp, ppos); + + if (rc == 0 && write && *valp != old_val) { + if (*valp == 1) + rc = mac_hid_start_emulation(); + else if (*valp == 0) + mac_hid_stop_emulation(); + else + rc = -EINVAL; + } + + /* Restore the old value in case of error */ + if (rc) + *valp = old_val; + + return rc; +} + /* file(s) in /proc/sys/dev/mac_hid */ static ctl_table mac_hid_files[] = { { @@ -31,7 +209,7 @@ static ctl_table mac_hid_files[] = { .data = &mouse_emulate_buttons, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = mac_hid_toggle_emumouse, }, { .procname = "mouse_button2_keycode", @@ -74,73 +252,12 @@ static ctl_table mac_hid_root_dir[] = { static struct ctl_table_header *mac_hid_sysctl_header; -#endif /* endif CONFIG_SYSCTL */ - -int mac_hid_mouse_emulate_buttons(int caller, unsigned int keycode, int down) -{ - switch (caller) { - case 1: - /* Called from keyboard.c */ - if (mouse_emulate_buttons - && (keycode == mouse_button2_keycode - || keycode == mouse_button3_keycode)) { - if (mouse_emulate_buttons == 1) { - input_report_key(emumousebtn, - keycode == mouse_button2_keycode ? BTN_MIDDLE : BTN_RIGHT, - down); - input_sync(emumousebtn); - return 1; - } - mouse_last_keycode = down ? keycode : 0; - } - break; - } - return 0; -} - -static struct lock_class_key emumousebtn_event_class; -static struct lock_class_key emumousebtn_mutex_class; - -static int emumousebtn_input_register(void) -{ - int ret; - - emumousebtn = input_allocate_device(); - if (!emumousebtn) - return -ENOMEM; - - lockdep_set_class(&emumousebtn->event_lock, &emumousebtn_event_class); - lockdep_set_class(&emumousebtn->mutex, &emumousebtn_mutex_class); - - emumousebtn->name = "Macintosh mouse button emulation"; - emumousebtn->id.bustype = BUS_ADB; - emumousebtn->id.vendor = 0x0001; - emumousebtn->id.product = 0x0001; - emumousebtn->id.version = 0x0100; - - emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL); - emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | - BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT); - emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y); - - ret = input_register_device(emumousebtn); - if (ret) - input_free_device(emumousebtn); - - return ret; -} static int __init mac_hid_init(void) { - int err; - - err = emumousebtn_input_register(); - if (err) - return err; - -#if defined(CONFIG_SYSCTL) mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir); -#endif /* CONFIG_SYSCTL */ + if (!mac_hid_sysctl_header) + return -ENOMEM; return 0; } diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 8bdb16bfe5f..506ad20c18f 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -161,7 +161,4 @@ static inline void con_schedule_flip(struct tty_struct *t) schedule_delayed_work(&t->buf.work, 0); } -/* mac_hid.c */ -extern int mac_hid_mouse_emulate_buttons(int, unsigned int, int); - #endif -- cgit v1.2.3-70-g09d2 From 61ef2489dbf587258526cfd4ebf4bba3b079f401 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 22 Jan 2010 16:16:19 +0800 Subject: resources: introduce generic page_is_ram() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's based on walk_system_ram_range(), for archs that don't have their own page_is_ram(). The static verions in MIPS and SCORE are also made global. v4: prefer plain 1 instead of PAGE_IS_RAM (H. Peter Anvin) v3: add comment (KAMEZAWA Hiroyuki) "AFAIK, this "System RAM" information has been used for kdump to grab valid memory area and seems good for the kernel itself." v2: add PAGE_IS_RAM macro (Américo Wang) Cc: Chen Liqin Cc: Lennox Wu Cc: Américo Wang Cc: linux-mips@linux-mips.org Cc: Yinghai Lu Acked-by: Ralf Baechle Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Wu Fengguang LKML-Reference: <20100122081619.GA6431@localhost> Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/mips/mm/init.c | 2 +- arch/score/mm/init.c | 2 +- include/linux/ioport.h | 2 ++ kernel/resource.c | 13 +++++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 15aa1902a78..4d72aabe835 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -294,7 +294,7 @@ void __init fixrange_init(unsigned long start, unsigned long end, } #ifndef CONFIG_NEED_MULTIPLE_NODES -static int __init page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { int i; diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 4e3dcd0c471..f684a590c21 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -59,7 +59,7 @@ static unsigned long setup_zero_page(void) } #ifndef CONFIG_NEED_MULTIPLE_NODES -static int __init page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { if (pagenr >= min_low_pfn && pagenr < max_low_pfn) return 1; diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 83aa81297ea..11ef7952b63 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -188,5 +188,7 @@ extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); +extern int page_is_ram(unsigned long pfn); + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/kernel/resource.c b/kernel/resource.c index fb11a58b959..b4d637a5525 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -297,6 +297,19 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, #endif +static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) +{ + return 1; +} +/* + * This generic page_is_ram() returns true if specified address is + * registered as "System RAM" in iomem_resource list. + */ +int __attribute__((weak)) page_is_ram(unsigned long pfn) +{ + return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; +} + /* * Find empty slot in the resource tree given range and alignment. */ -- cgit v1.2.3-70-g09d2 From 53df8fdc15fb646b0219e43c989c2cdab1ab100c Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 27 Jan 2010 11:06:39 +0800 Subject: Move page_is_ram() declaration to mm.h Move page_is_ram() declaration to mm.h, it makes no sense in . Signed-off-by: Wu Fengguang LKML-Reference: <20100127030639.GD8132@localhost> Signed-off-by: H. Peter Anvin --- include/linux/ioport.h | 2 -- include/linux/mm.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 11ef7952b63..83aa81297ea 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -188,7 +188,5 @@ extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); -extern int page_is_ram(unsigned long pfn); - #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 24c395694f4..bad433fdbfc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -265,6 +265,8 @@ static inline int get_page_unless_zero(struct page *page) return atomic_inc_not_zero(&page->_count); } +extern int page_is_ram(unsigned long pfn); + /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); -- cgit v1.2.3-70-g09d2 From b79c7adf82e8b8a6d6ad1dadf7e687a4a030cf8c Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 2 Feb 2010 13:01:25 +0900 Subject: mtd: trivial sh_flctl changes This patch contains a few changes for the sh_flctl driver: - not sh7723-only driver - get rid of kconfig dependency - use dev_err() instead of printk() - use __devinit and __devexit for probe()/remove() - fix probe() return values Signed-off-by: Magnus Damm Acked-by: Yoshihiro Shimoda Signed-off-by: Paul Mundt --- drivers/mtd/nand/Kconfig | 4 ++-- drivers/mtd/nand/sh_flctl.c | 42 +++++++++++++++++++++++------------------- include/linux/mtd/sh_flctl.h | 1 + 3 files changed, 26 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 677cd53f18c..bb646560423 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -457,10 +457,10 @@ config MTD_NAND_NOMADIK config MTD_NAND_SH_FLCTL tristate "Support for NAND on Renesas SuperH FLCTL" - depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723 + depends on MTD_NAND && SUPERH help Several Renesas SuperH CPU has FLCTL. This option enables support - for NAND Flash using FLCTL. This driver support SH7723. + for NAND Flash using FLCTL. config MTD_NAND_DAVINCI tristate "Support NAND on DaVinci SoC" diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index 02bef21f2e4..ab068a503b2 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -1,10 +1,10 @@ /* * SuperH FLCTL nand controller * - * Copyright © 2008 Renesas Solutions Corp. - * Copyright © 2008 Atom Create Engineering Co., Ltd. + * Copyright (c) 2008 Renesas Solutions Corp. + * Copyright (c) 2008 Atom Create Engineering Co., Ltd. * - * Based on fsl_elbc_nand.c, Copyright © 2006-2007 Freescale Semiconductor + * Based on fsl_elbc_nand.c, Copyright (c) 2006-2007 Freescale Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -75,6 +75,11 @@ static void start_translation(struct sh_flctl *flctl) writeb(TRSTRT, FLTRCR(flctl)); } +static void timeout_error(struct sh_flctl *flctl, const char *str) +{ + dev_err(&flctl->pdev->dev, "Timeout occured in %s\n", str); +} + static void wait_completion(struct sh_flctl *flctl) { uint32_t timeout = LOOP_TIMEOUT_MAX; @@ -87,7 +92,7 @@ static void wait_completion(struct sh_flctl *flctl) udelay(1); } - printk(KERN_ERR "wait_completion(): Timeout occured \n"); + timeout_error(flctl, __func__); writeb(0x0, FLTRCR(flctl)); } @@ -132,7 +137,7 @@ static void wait_rfifo_ready(struct sh_flctl *flctl) return; udelay(1); } - printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n"); + timeout_error(flctl, __func__); } static void wait_wfifo_ready(struct sh_flctl *flctl) @@ -146,7 +151,7 @@ static void wait_wfifo_ready(struct sh_flctl *flctl) return; udelay(1); } - printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n"); + timeout_error(flctl, __func__); } static int wait_recfifo_ready(struct sh_flctl *flctl, int sector_number) @@ -198,7 +203,7 @@ static int wait_recfifo_ready(struct sh_flctl *flctl, int sector_number) writel(0, FL4ECCCR(flctl)); } - printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n"); + timeout_error(flctl, __func__); return 1; /* timeout */ } @@ -214,7 +219,7 @@ static void wait_wecfifo_ready(struct sh_flctl *flctl) return; udelay(1); } - printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n"); + timeout_error(flctl, __func__); } static void read_datareg(struct sh_flctl *flctl, int offset) @@ -769,38 +774,36 @@ static int flctl_chip_init_tail(struct mtd_info *mtd) return 0; } -static int __init flctl_probe(struct platform_device *pdev) +static int __devinit flctl_probe(struct platform_device *pdev) { struct resource *res; struct sh_flctl *flctl; struct mtd_info *flctl_mtd; struct nand_chip *nand; struct sh_flctl_platform_data *pdata; - int ret; + int ret = -ENXIO; pdata = pdev->dev.platform_data; if (pdata == NULL) { - printk(KERN_ERR "sh_flctl platform_data not found.\n"); - return -ENODEV; + dev_err(&pdev->dev, "no platform data defined\n"); + return -EINVAL; } flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL); if (!flctl) { - printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n"); + dev_err(&pdev->dev, "failed to allocate driver data\n"); return -ENOMEM; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { - printk(KERN_ERR "%s: resource not found.\n", __func__); - ret = -ENODEV; + dev_err(&pdev->dev, "failed to get I/O memory\n"); goto err; } - flctl->reg = ioremap(res->start, res->end - res->start + 1); + flctl->reg = ioremap(res->start, resource_size(res)); if (flctl->reg == NULL) { - printk(KERN_ERR "%s: ioremap error.\n", __func__); - ret = -ENOMEM; + dev_err(&pdev->dev, "failed to remap I/O memory\n"); goto err; } @@ -808,6 +811,7 @@ static int __init flctl_probe(struct platform_device *pdev) flctl_mtd = &flctl->mtd; nand = &flctl->chip; flctl_mtd->priv = nand; + flctl->pdev = pdev; flctl->hwecc = pdata->has_hwecc; flctl_register_init(flctl, pdata->flcmncr_val); @@ -846,7 +850,7 @@ err: return ret; } -static int __exit flctl_remove(struct platform_device *pdev) +static int __devexit flctl_remove(struct platform_device *pdev) { struct sh_flctl *flctl = platform_get_drvdata(pdev); diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index e77c1cea404..164c9d4013c 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -96,6 +96,7 @@ struct sh_flctl { struct mtd_info mtd; struct nand_chip chip; + struct platform_device *pdev; void __iomem *reg; uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */ -- cgit v1.2.3-70-g09d2 From 010ab820582d03bcd3648416b5837107e8a9c5f3 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 27 Jan 2010 09:17:21 +0000 Subject: mtd: sh_flctl SHBUSSEL and SEL_16BIT support This patch extends the sh_flctl driver with support for 16-bit bus configuration using SEL_16BIT and support for multiplexed pins using SHBUSSEL. Signed-off-by: Magnus Damm Acked-by: Yoshihiro Shimoda Signed-off-by: Paul Mundt --- drivers/mtd/nand/sh_flctl.c | 27 ++++++++++++++++++++++++++- include/linux/mtd/sh_flctl.h | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index ab068a503b2..1842df8bdd9 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -105,6 +105,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr) addr = page_addr; /* ERASE1 */ } else if (page_addr != -1) { /* SEQIN, READ0, etc.. */ + if (flctl->chip.options & NAND_BUSWIDTH_16) + column >>= 1; if (flctl->page_size) { addr = column & 0x0FFF; addr |= (page_addr & 0xff) << 16; @@ -280,7 +282,7 @@ static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset) static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val) { struct sh_flctl *flctl = mtd_to_flctl(mtd); - uint32_t flcmncr_val = readl(FLCMNCR(flctl)); + uint32_t flcmncr_val = readl(FLCMNCR(flctl)) & ~SEL_16BIT; uint32_t flcmdcr_val, addr_len_bytes = 0; /* Set SNAND bit if page size is 2048byte */ @@ -302,6 +304,8 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va case NAND_CMD_READOOB: addr_len_bytes = flctl->rw_ADRCNT; flcmdcr_val |= CDSRC_E; + if (flctl->chip.options & NAND_BUSWIDTH_16) + flcmncr_val |= SEL_16BIT; break; case NAND_CMD_SEQIN: /* This case is that cmd is READ0 or READ1 or READ00 */ @@ -310,6 +314,8 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va case NAND_CMD_PAGEPROG: addr_len_bytes = flctl->rw_ADRCNT; flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW; + if (flctl->chip.options & NAND_BUSWIDTH_16) + flcmncr_val |= SEL_16BIT; break; case NAND_CMD_READID: flcmncr_val &= ~SNAND_E; @@ -528,6 +534,8 @@ static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command, set_addr(mtd, 0, page_addr); flctl->read_bytes = mtd->writesize + mtd->oobsize; + if (flctl->chip.options & NAND_BUSWIDTH_16) + column >>= 1; flctl->index += column; goto read_normal_exit; @@ -691,6 +699,18 @@ static uint8_t flctl_read_byte(struct mtd_info *mtd) return data; } +static uint16_t flctl_read_word(struct mtd_info *mtd) +{ + struct sh_flctl *flctl = mtd_to_flctl(mtd); + int index = flctl->index; + uint16_t data; + uint16_t *buf = (uint16_t *)&flctl->done_buff[index]; + + data = *buf; + flctl->index += 2; + return data; +} + static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) { int i; @@ -829,6 +849,11 @@ static int __devinit flctl_probe(struct platform_device *pdev) nand->select_chip = flctl_select_chip; nand->cmdfunc = flctl_cmdfunc; + if (pdata->flcmncr_val & SEL_16BIT) { + nand->options |= NAND_BUSWIDTH_16; + nand->read_word = flctl_read_word; + } + ret = nand_scan_ident(flctl_mtd, 1); if (ret) goto err; diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 164c9d4013c..ab77609ec33 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -51,6 +51,8 @@ #define _4ECCCNTEN (0x1 << 24) #define _4ECCEN (0x1 << 23) #define _4ECCCORRECT (0x1 << 22) +#define SHBUSSEL (0x1 << 20) +#define SEL_16BIT (0x1 << 19) #define SNAND_E (0x1 << 18) /* SNAND (0=512 1=2048)*/ #define QTSEL_E (0x1 << 17) #define ENDIAN (0x1 << 16) /* 1 = little endian */ -- cgit v1.2.3-70-g09d2 From c30f540b63047437ffa894b5353216410c480d1a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 2 Feb 2010 15:03:24 +0100 Subject: netfilter: xtables: CONFIG_COMPAT redux Ifdef out struct nf_sockopt_ops::compat_set struct nf_sockopt_ops::compat_get struct xt_match::compat_from_user struct xt_match::compat_to_user struct xt_match::compatsize to make structures smaller on COMPAT=n kernels. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 9 ++++++--- include/linux/netfilter/x_tables.h | 12 ++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 48c54960773..78f33d22368 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,17 @@ struct nf_sockopt_ops { int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len); +#ifdef CONFIG_COMPAT int (*compat_set)(struct sock *sk, int optval, void __user *user, unsigned int len); - +#endif int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); +#ifdef CONFIG_COMPAT int (*compat_get)(struct sock *sk, int optval, void __user *user, int *len); - +#endif /* Use the module struct to lock set/get code in place */ struct module *owner; }; @@ -222,11 +224,12 @@ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); - +#ifdef CONFIG_COMPAT int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); +#endif /* Call this before modifying an existing packet: ensures it is modifiable and linear to the point you care about (writable_len). diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 3caf5e15110..026eb78ee83 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -283,11 +283,11 @@ struct xt_match { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); - +#ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); int (*compat_to_user)(void __user *dst, void *src); - +#endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -296,7 +296,9 @@ struct xt_match { const char *table; unsigned int matchsize; +#ifdef CONFIG_COMPAT unsigned int compatsize; +#endif unsigned int hooks; unsigned short proto; @@ -323,17 +325,19 @@ struct xt_target { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); - +#ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); int (*compat_to_user)(void __user *dst, void *src); - +#endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int targetsize; +#ifdef CONFIG_COMPAT unsigned int compatsize; +#endif unsigned int hooks; unsigned short proto; -- cgit v1.2.3-70-g09d2 From c85bb41e93184bf5494dde6d8fe5a81b564c84c8 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 2 Feb 2010 07:32:29 -0800 Subject: igmp: fix ip_mc_sf_allow race [v5] Almost all igmp functions accessing inet->mc_list are protected by rtnl_lock(), but there is one exception which is ip_mc_sf_allow(), so there is a chance of either ip_mc_drop_socket or ip_mc_leave_group remove an entry while ip_mc_sf_allow is running causing a crash. Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ net/ipv4/igmp.c | 83 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 724c27e5d17..93fc2449af1 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -153,6 +153,7 @@ extern int sysctl_igmp_max_msf; struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; + struct rcu_head rcu; __be32 sl_addr[0]; }; @@ -170,6 +171,7 @@ struct ip_mc_socklist { struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip_sf_list { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8f5468393f0..d2836399874 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - inet->mc_list = iml; + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -1807,24 +1807,46 @@ done: return err; } +static void ip_sf_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_sf_socklist *psf; + + psf = container_of(rp, struct ip_sf_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(psf); +} + static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { + struct ip_sf_socklist *psf = iml->sflist; int err; - if (iml->sflist == NULL) { + if (psf == NULL) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, - iml->sfmode, iml->sflist->sl_count, - iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + iml->sfmode, psf->sl_count, psf->sl_addr, 0); + rcu_assign_pointer(iml->sflist, NULL); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + call_rcu(&psf->rcu, ip_sf_socklist_reclaim); return err; } + +static void ip_mc_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_mc_socklist *iml; + + iml = container_of(rp, struct ip_mc_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(iml); +} + + /* * Ask a socket to leave a group. */ @@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + rcu_assign_pointer(*imlp, iml->next); if (in_dev) ip_mc_dec_group(in_dev, group); rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); return 0; } if (!in_dev) @@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (psl) { for (i=0; isl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + call_rcu(&psl->rcu, ip_sf_socklist_reclaim); } - pmc->sflist = psl = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); + psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; isl_count; i++) { @@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + call_rcu(&psl->rcu, ip_sf_socklist_reclaim); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: @@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; + int ret; + ret = 1; if (!ipv4_is_multicast(loc_addr)) - return 1; + goto out; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; } + ret = inet->mc_all; if (!pmc) - return inet->mc_all; + goto unlock; psl = pmc->sflist; + ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) - return pmc->sfmode == MCAST_EXCLUDE; + goto unlock; for (i=0; isl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } + ret = 0; if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - return 0; + goto unlock; if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - return 0; - return 1; + goto unlock; + ret = 1; +unlock: + rcu_read_unlock(); +out: + return ret; } /* @@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; - inet->mc_list = iml->next; + rcu_assign_pointer(inet->mc_list, iml->next); in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); @@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); } rtnl_unlock(); } -- cgit v1.2.3-70-g09d2 From f9bfbebf34eab707b065116cdc9699d25ba4252a Mon Sep 17 00:00:00 2001 From: Shirley Ma Date: Fri, 29 Jan 2010 03:19:05 +0000 Subject: virtio: Add ability to detach unused buffers from vrings There's currently no way for a virtio driver to ask for unused buffers, so it has to keep a list itself to reclaim them at shutdown. This is redundant, since virtio_ring stores that information. So add a new hook to do this. Signed-off-by: Shirley Ma Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++ include/linux/virtio.h | 4 ++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fbd2ecde93e..71929ee00d6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -334,6 +334,30 @@ static bool vring_enable_cb(struct virtqueue *_vq) return true; } +static void *vring_detach_unused_buf(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + unsigned int i; + void *buf; + + START_USE(vq); + + for (i = 0; i < vq->vring.num; i++) { + if (!vq->data[i]) + continue; + /* detach_buf clears data, so grab it now. */ + buf = vq->data[i]; + detach_buf(vq, i); + END_USE(vq); + return buf; + } + /* That should have freed everything. */ + BUG_ON(vq->num_free != vq->vring.num); + + END_USE(vq); + return NULL; +} + irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -360,6 +384,7 @@ static struct virtqueue_ops vring_vq_ops = { .kick = vring_kick, .disable_cb = vring_disable_cb, .enable_cb = vring_enable_cb, + .detach_unused_buf = vring_detach_unused_buf, }; struct virtqueue *vring_new_virtqueue(unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 057a2e01075..f508c651e53 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,9 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * @detach_unused_buf: detach first unused buffer + * vq: the struct virtqueue we're talking about. + * Returns NULL or the "data" token handed to add_buf * * Locking rules are straightforward: the driver is responsible for * locking. No two operations may be invoked simultaneously, with the exception @@ -71,6 +74,7 @@ struct virtqueue_ops { void (*disable_cb)(struct virtqueue *vq); bool (*enable_cb)(struct virtqueue *vq); + void *(*detach_unused_buf)(struct virtqueue *vq); }; /** -- cgit v1.2.3-70-g09d2 From 24551f64d47af9539a7f324343bffeea09d9dcfa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Jan 2010 21:25:24 +0000 Subject: lmb: Add lmb_free() We can free memory allocated with lmb_alloc() by removing it from the list of reserved LMBs. Rework lmb_remove() to allow that possibility and add lmb_free() which exploits it. BenH: Removed some useless parenthesis Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- include/linux/lmb.h | 1 + lib/lmb.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lmb.h b/include/linux/lmb.h index ef82b8fcbdd..f3d14333ebe 100644 --- a/include/linux/lmb.h +++ b/include/linux/lmb.h @@ -42,6 +42,7 @@ extern void __init lmb_init(void); extern void __init lmb_analyze(void); extern long lmb_add(u64 base, u64 size); extern long lmb_remove(u64 base, u64 size); +extern long __init lmb_free(u64 base, u64 size); extern long __init lmb_reserve(u64 base, u64 size); extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, u64 (*nid_range)(u64, u64, int *)); diff --git a/lib/lmb.c b/lib/lmb.c index 9cee17142b2..b1fc5260652 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -205,9 +205,8 @@ long lmb_add(u64 base, u64 size) } -long lmb_remove(u64 base, u64 size) +static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size) { - struct lmb_region *rgn = &(lmb.memory); u64 rgnbegin, rgnend; u64 end = base + size; int i; @@ -254,6 +253,16 @@ long lmb_remove(u64 base, u64 size) return lmb_add_region(rgn, end, rgnend - end); } +long lmb_remove(u64 base, u64 size) +{ + return __lmb_remove(&lmb.memory, base, size); +} + +long __init lmb_free(u64 base, u64 size) +{ + return __lmb_remove(&lmb.reserved, base, size); +} + long __init lmb_reserve(u64 base, u64 size) { struct lmb_region *_rgn = &lmb.reserved; -- cgit v1.2.3-70-g09d2 From add67461240c1dadc7c8d97e66f8f92b556ca523 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:45:12 +0100 Subject: netfilter: add struct net * to target parameters Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 ++ net/bridge/netfilter/ebtables.c | 10 ++++++---- net/ipv4/netfilter/ip_tables.c | 8 +++++--- net/ipv6/netfilter/ip6_tables.c | 8 +++++--- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 026eb78ee83..365fabe1b16 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -249,6 +249,7 @@ struct xt_target_param { * Other fields see above. */ struct xt_tgchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_target *target; @@ -259,6 +260,7 @@ struct xt_tgchk_param { /* Target destructor parameters */ struct xt_tgdtor_param { + struct net *net; const struct xt_target *target; void *targinfo; u_int8_t family; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 1aa0e4c1f52..12beb580aa2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -579,13 +579,14 @@ ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) } static inline int -ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) +ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.target = w->u.watcher; par.targinfo = w->data; par.family = NFPROTO_BRIDGE; @@ -606,10 +607,11 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) /* we're done */ if (cnt && (*cnt)-- == 0) return 1; - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + par.net = net; par.target = t->u.target; par.targinfo = t->data; par.family = NFPROTO_BRIDGE; @@ -674,7 +676,7 @@ ebt_check_entry(struct ebt_entry *e, } i = 0; - mtpar.net = net; + mtpar.net = tgpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; @@ -730,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e, (*cnt)++; return 0; cleanup_watchers: - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); cleanup_matches: EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cfaba0e2e6f..7fde8f6950d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -638,10 +638,11 @@ err: return ret; } -static int check_target(struct ipt_entry *e, const char *name) +static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct ipt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -697,7 +698,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; @@ -788,6 +789,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ipt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; @@ -1675,7 +1677,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, if (ret) goto cleanup_matches; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto cleanup_matches; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9f1d45f2ba8..0376ed6d559 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -669,10 +669,11 @@ err: return ret; } -static int check_target(struct ip6t_entry *e, const char *name) +static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct ip6t_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -729,7 +730,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; @@ -820,6 +821,7 @@ cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ip6t_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; @@ -1710,7 +1712,7 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, if (ret) goto cleanup_matches; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto cleanup_matches; -- cgit v1.2.3-70-g09d2 From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:51:51 +0100 Subject: netfilter: ctnetlink: support selective event delivery Add two masks for conntrack end expectation events to struct nf_conntrack_ecache and use them to filter events. Their default value is "all events" when the event sysctl is on and "no events" when it is off. A following patch will add specific initializations. Expectation events depend on the ecache struct of their master conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 18 ++++++++ include/net/netfilter/nf_conntrack_ecache.h | 59 +++++++++++++-------------- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- 4 files changed, 48 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a374787ed9b..ebfed90733f 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -74,6 +74,24 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW, /* new expectation */ +}; + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 5e05fb883ab..96ba5f7dcab 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,28 +12,12 @@ #include #include -/* Connection tracking event types */ -enum ip_conntrack_events { - IPCT_NEW, /* new conntrack */ - IPCT_RELATED, /* related conntrack */ - IPCT_DESTROY, /* destroyed conntrack */ - IPCT_REPLY, /* connection has seen two-way traffic */ - IPCT_ASSURED, /* connection status has changed to assured */ - IPCT_PROTOINFO, /* protocol information has changed */ - IPCT_HELPER, /* new helper has been set */ - IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ - IPCT_SECMARK, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW, /* new expectation */ -}; - struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u32 pid; /* netlink pid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 pid; /* netlink pid of destroyer */ }; static inline struct nf_conntrack_ecache * @@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct) } static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp) +nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; - if (!net->ct.sysctl_events) + if (!ctmask && !expmask && net->ct.sysctl_events) { + ctmask = ~0; + expmask = ~0; + } + if (!ctmask && !expmask) return NULL; - return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + return e; }; #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; + if (!(e->ctmask & (1 << event))) + return; + set_bit(event, &e->cache); } @@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, int report) { int ret = 0; - struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; @@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) - goto out_unlock; - e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; @@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask, /* This is a resent of a destroy event? If so, skip missed */ unsigned long missed = e->pid ? 0 : e->missed; + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(eventmask | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); @@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct net *net = nf_ct_exp_net(exp); struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(nf_expect_event_cb); if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) + e = nf_ct_ecache_find(exp->master); + if (e == NULL) goto out_unlock; - { + if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .pid = pid, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 091ff770eb7..53b8da6ad6b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -648,7 +648,7 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ff594eb138c..f5c0b09e12f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3-70-g09d2 From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 14:13:03 +0100 Subject: netfilter: nf_conntrack: support conntrack templates Support initializing selected parameters of new conntrack entries from a "conntrack template", which is a specially marked conntrack entry attached to the skb. Currently the helper and the event delivery masks can be initialized this way. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 4 +++ include/net/netfilter/nf_conntrack.h | 5 +++ include/net/netfilter/nf_conntrack_helper.h | 3 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 50 +++++++++++++++++--------- net/netfilter/nf_conntrack_helper.c | 17 ++++++--- net/netfilter/nf_conntrack_netlink.c | 2 +- 8 files changed, 61 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index ebfed90733f..c608677dda6 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -72,6 +72,10 @@ enum ip_conntrack_status { /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), + + /* Conntrack is a template */ + IPS_TEMPLATE_BIT = 11, + IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), }; /* Connection tracking event types */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a0904adfb8f..5043d61c99a 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -272,6 +272,11 @@ nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp); +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 86be7c4816d..e17aaa3e19f 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -47,7 +47,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); -extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); extern void nf_ct_helper_destroy(struct nf_conn *ct); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3ebd1..77627fa8056 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -59,7 +59,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; #endif #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 0956ebabbff..55ce22e5de4 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -212,7 +212,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 53b8da6ad6b..471e2a79d26 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(struct net *net, +init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, @@ -628,6 +628,7 @@ init_conntrack(struct net *net, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -648,7 +649,11 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + + ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; + nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); @@ -673,7 +678,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -694,7 +699,7 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct net *net, +resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, @@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net, /* look for tuple match */ h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, + skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -755,7 +761,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct; + struct nf_conn *ct, *tmpl = NULL; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(net, ignore); - return NF_ACCEPT; + /* Previously seen (loopback or untracked)? Ignore. */ + tmpl = (struct nf_conn *)skb->nfct; + if (!nf_ct_is_template(tmpl)) { + NF_CT_STAT_INC_ATOMIC(net, ignore); + return NF_ACCEPT; + } + skb->nfct = NULL; } /* rcu_read_lock()ed by nf_hook_slow */ @@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } l4proto = __nf_ct_l4proto_find(pf, protonum); @@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } } - ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); - return NF_ACCEPT; + ret = NF_ACCEPT; + goto out; } if (IS_ERR(ct)) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; + ret = NF_DROP; + goto out; } NF_CT_ASSERT(skb->nfct); @@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); - return -ret; + ret = -ret; + goto out; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); +out: + if (tmpl) + nf_ct_put(tmpl); return ret; } @@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c0e461f466a..8144b0da551 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn_help *help; int ret = 0; - struct nf_conntrack_helper *helper; - struct nf_conn_help *help = nfct_help(ct); - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (tmpl != NULL) { + help = nfct_help(tmpl); + if (help != NULL) + helper = help->helper; + } + + help = nfct_help(ct); + if (helper == NULL) + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) rcu_assign_pointer(help->helper, NULL); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f5c0b09e12f..09044f9f4b2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net, } } else { /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); if (err < 0) goto err2; } -- cgit v1.2.3-70-g09d2 From d4bfa033ed84e0ae446eff445d107ffd5ee78df3 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 29 Jan 2010 15:03:36 +0100 Subject: HID: make raw reports possible for both feature and output reports In commit 2da31939a42 ("Bluetooth: Implement raw output support for HIDP layer"), support for Bluetooth hid_output_raw_report was added, but it pushes the data to the intr socket instead of the ctrl one. This has been fixed by 6bf8268f9a91f1 ("Bluetooth: Use the control channel for raw HID reports") Still, it is necessary to distinguish whether the report in question should be either FEATURE or OUTPUT. For this, we have to extend the generic HID API, so that hid_output_raw_report() callback provides means to specify this value so that it can be passed down to lower level hardware drivers (currently Bluetooth and USB). Based on original patch by Bastien Nocera Acked-by: Marcel Holtmann Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 2 +- drivers/hid/usbhid/hid-core.c | 5 +++-- include/linux/hid.h | 2 +- net/bluetooth/hidp/core.c | 17 ++++++++++++++--- 4 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index cdd136942bc..d04476700b7 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -134,7 +134,7 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t goto out; } - ret = dev->hid_output_raw_report(dev, buf, count); + ret = dev->hid_output_raw_report(dev, buf, count, HID_OUTPUT_REPORT); out: kfree(buf); return ret; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index e2997a8d5e1..caa16c057ce 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -774,7 +774,8 @@ static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid) return 0; } -static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t count) +static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t count, + unsigned char report_type) { struct usbhid_device *usbhid = hid->driver_data; struct usb_device *dev = hid_to_usb_dev(hid); @@ -785,7 +786,7 @@ static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t co ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), HID_REQ_SET_REPORT, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, - ((HID_OUTPUT_REPORT + 1) << 8) | *buf, + ((report_type + 1) << 8) | *buf, interface->desc.bInterfaceNumber, buf + 1, count - 1, USB_CTRL_SET_TIMEOUT); diff --git a/include/linux/hid.h b/include/linux/hid.h index 87093652dda..3661a626941 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -501,7 +501,7 @@ struct hid_device { /* device report descriptor */ void (*hiddev_report_event) (struct hid_device *, struct hid_report *); /* handler for raw output data, used by hidraw */ - int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t); + int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t, unsigned char); /* debugging support via debugfs */ unsigned short debug; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6cf526d06e2..37ba153c4cd 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -313,10 +313,21 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep return hidp_queue_report(session, buf, rsize); } -static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count) +static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, + unsigned char report_type) { - if (hidp_send_ctrl_message(hid->driver_data, - HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE, + switch (report_type) { + case HID_FEATURE_REPORT: + report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; + break; + case HID_OUTPUT_REPORT: + report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; + break; + default: + return -EINVAL; + } + + if (hidp_send_ctrl_message(hid->driver_data, report_type, data, count)) return -ENOMEM; return count; -- cgit v1.2.3-70-g09d2 From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 17:17:06 +0100 Subject: netfilter: xtables: add CT target Add a new target for the raw table, which can be used to specify conntrack parameters for specific connections, f.i. the conntrack helper. The target attaches a "template" connection tracking entry to the skb, which is used by the conntrack core when initializing a new conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_CT.h | 17 +++ include/net/netfilter/nf_conntrack_helper.h | 3 + net/netfilter/Kconfig | 12 +++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_helper.c | 19 ++++ net/netfilter/xt_CT.c | 158 ++++++++++++++++++++++++++++ 7 files changed, 211 insertions(+) create mode 100644 include/linux/netfilter/xt_CT.h create mode 100644 net/netfilter/xt_CT.c (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 2aea50399c0..a5a63e41b8a 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nfnetlink_queue.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h +header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_LED.h header-y += xt_MARK.h diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h new file mode 100644 index 00000000000..7fd0effe131 --- /dev/null +++ b/include/linux/netfilter/xt_CT.h @@ -0,0 +1,17 @@ +#ifndef _XT_CT_H +#define _XT_CT_H + +#define XT_CT_NOTRACK 0x1 + +struct xt_ct_target_info { + u_int16_t flags; + u_int16_t __unused; + u_int32_t ct_events; + u_int32_t exp_events; + char helper[16]; + + /* Used internally by the kernel */ + struct nf_conn *ct __attribute__((aligned(8))); +}; + +#endif /* _XT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e17aaa3e19f..32c305dbdab 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -42,6 +42,9 @@ struct nf_conntrack_helper { extern struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); +extern struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); + extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 634d14affc8..4469d45261f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_CT + tristate '"CT" target support' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + help + This options adds a `CT' target, which allows to specify initial + connection tracking parameters like events to be delivered and + the helper to be used. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 49f62ee4e9f..f873644f02f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8144b0da551..a74a5769877 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); +struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + + h = __nf_conntrack_helper_find(name, l3num, protonum); +#ifdef CONFIG_MODULES + if (h == NULL) { + if (request_module("nfct-helper-%s", name) == 0) + h = __nf_conntrack_helper_find(name, l3num, protonum); + } +#endif + if (h != NULL && !try_module_get(h->me)) + h = NULL; + + return h; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c new file mode 100644 index 00000000000..8183a054256 --- /dev/null +++ b/net/netfilter/xt_CT.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int xt_ct_target(struct sk_buff *skb, + const struct xt_target_param *par) +{ + const struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; + + return XT_CONTINUE; +} + +static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) +{ + if (par->family == AF_INET) { + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.invflags & IPT_INV_PROTO) + return 0; + return e->ip.proto; + } else if (par->family == AF_INET6) { + const struct ip6t_entry *e = par->entryinfo; + + if (e->ipv6.invflags & IP6T_INV_PROTO) + return 0; + return e->ipv6.proto; + } else + return 0; +} + +static bool xt_ct_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conntrack_tuple t; + struct nf_conn_help *help; + struct nf_conn *ct; + u8 proto; + + if (info->flags & ~XT_CT_NOTRACK) + return false; + + if (info->flags & XT_CT_NOTRACK) { + ct = &nf_conntrack_untracked; + atomic_inc(&ct->ct_general.use); + goto out; + } + + if (nf_ct_l3proto_try_module_get(par->family) < 0) + goto err1; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) + goto err2; + + if ((info->ct_events || info->exp_events) && + !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, + GFP_KERNEL)) + goto err3; + + if (info->helper[0]) { + proto = xt_ct_find_proto(par); + if (!proto) + goto err3; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (help->helper == NULL) + goto err3; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +out: + info->ct = ct; + return true; + +err3: + nf_conntrack_free(ct); +err2: + nf_ct_l3proto_module_put(par->family); +err1: + return false; +} + +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + + if (ct != &nf_conntrack_untracked) { + help = nfct_help(ct); + if (help) + module_put(help->helper->me); + + nf_ct_l3proto_module_put(par->family); + } + nf_ct_put(info->ct); +} + +static struct xt_target xt_ct_tg __read_mostly = { + .name = "CT", + .family = NFPROTO_UNSPEC, + .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), + .checkentry = xt_ct_tg_check, + .destroy = xt_ct_tg_destroy, + .target = xt_ct_target, + .table = "raw", + .me = THIS_MODULE, +}; + +static int __init xt_ct_tg_init(void) +{ + return xt_register_target(&xt_ct_tg); +} + +static void __exit xt_ct_tg_exit(void) +{ + xt_unregister_target(&xt_ct_tg); +} + +module_init(xt_ct_tg_init); +module_exit(xt_ct_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: connection tracking target"); +MODULE_ALIAS("ipt_CT"); +MODULE_ALIAS("ip6t_CT"); -- cgit v1.2.3-70-g09d2 From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sat, 30 Jan 2010 10:25:18 +0530 Subject: perf: Make bp_len type to u64 generic across the arch Change 'bp_len' type to __u64 to make it work across archs as the s390 architecture watch point length can be upto 2^64. reference: http://lkml.org/lkml/2010/1/25/212 This is an ABI change that is not backward compatible with the previous hardware breakpoint info layout integrated in this development cycle, a rebuilt of perf tools is necessary for versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a kernel based on this patch. Signed-off-by: Mahesh Salgaonkar Acked-by: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: "K. Prasad" Cc: Maneesh Soni Cc: Heiko Carstens Cc: Martin LKML-Reference: <20100130045518.GA20776@in.ibm.com> Signed-off-by: Frederic Weisbecker --- include/linux/hw_breakpoint.h | 2 +- include/linux/perf_event.h | 6 ++---- kernel/hw_breakpoint.c | 2 +- kernel/perf_event.c | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 070ba062173..5977b724f7c 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp) return bp->attr.bp_type; } -static inline int hw_breakpoint_len(struct perf_event *bp) +static inline unsigned long hw_breakpoint_len(struct perf_event *bp) { return bp->attr.bp_len; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8fa71874113..a177698d95e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -211,11 +211,9 @@ struct perf_event_attr { __u32 wakeup_watermark; /* bytes before wakeup */ }; - __u32 __reserved_2; - - __u64 bp_addr; __u32 bp_type; - __u32 bp_len; + __u64 bp_addr; + __u64 bp_len; }; /* diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 8a5c7d55ac9..967e66143e1 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { u64 old_addr = bp->attr.bp_addr; + u64 old_len = bp->attr.bp_len; int old_type = bp->attr.bp_type; - int old_len = bp->attr.bp_len; int err = 0; perf_event_disable(bp); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d27746bd3a0..2b19297742c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->type >= PERF_TYPE_MAX) return -EINVAL; - if (attr->__reserved_1 || attr->__reserved_2) + if (attr->__reserved_1) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) -- cgit v1.2.3-70-g09d2 From 002345925e6c45861f60db6f4fc6236713fd8847 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Feb 2010 15:36:43 -0800 Subject: syslog: distinguish between /proc/kmsg and syscalls This allows the LSM to distinguish between syslog functions originating from /proc/kmsg access and direct syscalls. By default, the commoncaps will now no longer require CAP_SYS_ADMIN to read an opened /proc/kmsg file descriptor. For example the kernel syslog reader can now drop privileges after opening /proc/kmsg, instead of staying privileged with CAP_SYS_ADMIN. MAC systems that implement security_syslog have unchanged behavior. Signed-off-by: Kees Cook Acked-by: Serge Hallyn Acked-by: John Johansen Signed-off-by: James Morris --- fs/proc/kmsg.c | 14 +++++++------- include/linux/security.h | 11 ++++++----- include/linux/syslog.h | 29 +++++++++++++++++++++++++++++ kernel/printk.c | 7 ++++--- security/commoncap.c | 7 ++++++- security/security.c | 4 ++-- security/selinux/hooks.c | 5 +++-- security/smack/smack_lsm.c | 4 ++-- 8 files changed, 59 insertions(+), 22 deletions(-) create mode 100644 include/linux/syslog.h (limited to 'include/linux') diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 7ca78346d3f..6a3d843a108 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -12,37 +12,37 @@ #include #include #include +#include #include #include extern wait_queue_head_t log_wait; -extern int do_syslog(int type, char __user *bug, int count); - static int kmsg_open(struct inode * inode, struct file * file) { - return do_syslog(1,NULL,0); + return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE); } static int kmsg_release(struct inode * inode, struct file * file) { - (void) do_syslog(0,NULL,0); + (void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE); return 0; } static ssize_t kmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0)) + if ((file->f_flags & O_NONBLOCK) && + !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE)) return -EAGAIN; - return do_syslog(2, buf, count); + return do_syslog(2, buf, count, SYSLOG_FROM_FILE); } static unsigned int kmsg_poll(struct file *file, poll_table *wait) { poll_wait(file, &log_wait, wait); - if (do_syslog(9, NULL, 0)) + if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE)) return POLLIN | POLLRDNORM; return 0; } diff --git a/include/linux/security.h b/include/linux/security.h index 26eca85b241..a4dc74d86ac 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -76,7 +76,7 @@ extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); -extern int cap_syslog(int type); +extern int cap_syslog(int type, bool from_file); extern int cap_vm_enough_memory(struct mm_struct *mm, long pages); struct msghdr; @@ -1349,6 +1349,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * logging to the console. * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. + * @from_file indicates the context of action (if it came from /proc). * Return 0 if permission is granted. * @settime: * Check permission to change the system time. @@ -1463,7 +1464,7 @@ struct security_operations { int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); int (*quota_on) (struct dentry *dentry); - int (*syslog) (int type); + int (*syslog) (int type, bool from_file); int (*settime) (struct timespec *ts, struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); @@ -1762,7 +1763,7 @@ int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); -int security_syslog(int type); +int security_syslog(int type, bool from_file); int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); @@ -2008,9 +2009,9 @@ static inline int security_quota_on(struct dentry *dentry) return 0; } -static inline int security_syslog(int type) +static inline int security_syslog(int type, bool from_file) { - return cap_syslog(type); + return cap_syslog(type, from_file); } static inline int security_settime(struct timespec *ts, struct timezone *tz) diff --git a/include/linux/syslog.h b/include/linux/syslog.h new file mode 100644 index 00000000000..5f02b1817be --- /dev/null +++ b/include/linux/syslog.h @@ -0,0 +1,29 @@ +/* Syslog internals + * + * Copyright 2010 Canonical, Ltd. + * Author: Kees Cook + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_SYSLOG_H +#define _LINUX_SYSLOG_H + +#define SYSLOG_FROM_CALL 0 +#define SYSLOG_FROM_FILE 1 + +int do_syslog(int type, char __user *buf, int count, bool from_file); + +#endif /* _LINUX_SYSLOG_H */ diff --git a/kernel/printk.c b/kernel/printk.c index 17463ca2e22..809cf9a258a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -273,14 +274,14 @@ static inline void boot_delay_msec(void) * 9 -- Return number of unread characters in the log buffer * 10 -- Return size of the log buffer */ -int do_syslog(int type, char __user *buf, int len) +int do_syslog(int type, char __user *buf, int len, bool from_file) { unsigned i, j, limit, count; int do_clear = 0; char c; int error = 0; - error = security_syslog(type); + error = security_syslog(type, from_file); if (error) return error; @@ -417,7 +418,7 @@ out: SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { - return do_syslog(type, buf, len); + return do_syslog(type, buf, len, SYSLOG_FROM_CALL); } /* diff --git a/security/commoncap.c b/security/commoncap.c index f800fdb3de9..677fad9d5cb 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * If a non-root user executes a setuid-root binary in @@ -888,12 +889,16 @@ error: /** * cap_syslog - Determine whether syslog function is permitted * @type: Function requested + * @from_file: Whether this request came from an open file (i.e. /proc) * * Determine whether the current process is permitted to use a particular * syslog function, returning 0 if permission is granted, -ve if not. */ -int cap_syslog(int type) +int cap_syslog(int type, bool from_file) { + /* /proc/kmsg can open be opened by CAP_SYS_ADMIN */ + if (type != 1 && from_file) + return 0; if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) return -EPERM; return 0; diff --git a/security/security.c b/security/security.c index 440afe5eb54..971092c06f3 100644 --- a/security/security.c +++ b/security/security.c @@ -203,9 +203,9 @@ int security_quota_on(struct dentry *dentry) return security_ops->quota_on(dentry); } -int security_syslog(int type) +int security_syslog(int type, bool from_file) { - return security_ops->syslog(type); + return security_ops->syslog(type, from_file); } int security_settime(struct timespec *ts, struct timezone *tz) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9a2ee845e9d..a4862a0730f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -76,6 +76,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -2049,11 +2050,11 @@ static int selinux_quota_on(struct dentry *dentry) return dentry_has_perm(cred, NULL, dentry, FILE__QUOTAON); } -static int selinux_syslog(int type) +static int selinux_syslog(int type, bool from_file) { int rc; - rc = cap_syslog(type); + rc = cap_syslog(type, from_file); if (rc) return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 529c9ca6587..a5721b373f5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -157,12 +157,12 @@ static int smack_ptrace_traceme(struct task_struct *ptp) * * Returns 0 on success, error code otherwise. */ -static int smack_syslog(int type) +static int smack_syslog(int type, bool from_file) { int rc; char *sp = current_security(); - rc = cap_syslog(type); + rc = cap_syslog(type, from_file); if (rc != 0) return rc; -- cgit v1.2.3-70-g09d2 From d78ca3cd733d8a2c3dcd88471beb1a15d973eed8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Feb 2010 15:37:13 -0800 Subject: syslog: use defined constants instead of raw numbers Right now the syslog "type" action are just raw numbers which makes the source difficult to follow. This patch replaces the raw numbers with defined constants for some level of sanity. Signed-off-by: Kees Cook Acked-by: John Johansen Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/kmsg.c | 10 +++++----- include/linux/syslog.h | 23 +++++++++++++++++++++++ kernel/printk.c | 45 +++++++++++++++++++-------------------------- security/commoncap.c | 5 +++-- security/selinux/hooks.c | 21 +++++++++++---------- 5 files changed, 61 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 6a3d843a108..cfe90a48a6e 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait; static int kmsg_open(struct inode * inode, struct file * file) { - return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE); } static int kmsg_release(struct inode * inode, struct file * file) { - (void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE); + (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE); return 0; } @@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { if ((file->f_flags & O_NONBLOCK) && - !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE)) + !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) return -EAGAIN; - return do_syslog(2, buf, count, SYSLOG_FROM_FILE); + return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE); } static unsigned int kmsg_poll(struct file *file, poll_table *wait) { poll_wait(file, &log_wait, wait); - if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE)) + if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) return POLLIN | POLLRDNORM; return 0; } diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 5f02b1817be..38911391a13 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -21,6 +21,29 @@ #ifndef _LINUX_SYSLOG_H #define _LINUX_SYSLOG_H +/* Close the log. Currently a NOP. */ +#define SYSLOG_ACTION_CLOSE 0 +/* Open the log. Currently a NOP. */ +#define SYSLOG_ACTION_OPEN 1 +/* Read from the log. */ +#define SYSLOG_ACTION_READ 2 +/* Read all messages remaining in the ring buffer. */ +#define SYSLOG_ACTION_READ_ALL 3 +/* Read and clear all messages remaining in the ring buffer */ +#define SYSLOG_ACTION_READ_CLEAR 4 +/* Clear ring buffer. */ +#define SYSLOG_ACTION_CLEAR 5 +/* Disable printk's to console */ +#define SYSLOG_ACTION_CONSOLE_OFF 6 +/* Enable printk's to console */ +#define SYSLOG_ACTION_CONSOLE_ON 7 +/* Set level of messages printed to console */ +#define SYSLOG_ACTION_CONSOLE_LEVEL 8 +/* Return number of unread characters in the log buffer */ +#define SYSLOG_ACTION_SIZE_UNREAD 9 +/* Return size of the log buffer */ +#define SYSLOG_ACTION_SIZE_BUFFER 10 + #define SYSLOG_FROM_CALL 0 #define SYSLOG_FROM_FILE 1 diff --git a/kernel/printk.c b/kernel/printk.c index 809cf9a258a..3e162d86709 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -259,21 +259,6 @@ static inline void boot_delay_msec(void) } #endif -/* - * Commands to do_syslog: - * - * 0 -- Close the log. Currently a NOP. - * 1 -- Open the log. Currently a NOP. - * 2 -- Read from the log. - * 3 -- Read all messages remaining in the ring buffer. - * 4 -- Read and clear all messages remaining in the ring buffer - * 5 -- Clear ring buffer. - * 6 -- Disable printk's to console - * 7 -- Enable printk's to console - * 8 -- Set level of messages printed to console - * 9 -- Return number of unread characters in the log buffer - * 10 -- Return size of the log buffer - */ int do_syslog(int type, char __user *buf, int len, bool from_file) { unsigned i, j, limit, count; @@ -286,11 +271,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) return error; switch (type) { - case 0: /* Close log */ + case SYSLOG_ACTION_CLOSE: /* Close log */ break; - case 1: /* Open log */ + case SYSLOG_ACTION_OPEN: /* Open log */ break; - case 2: /* Read from log */ + case SYSLOG_ACTION_READ: /* Read from log */ error = -EINVAL; if (!buf || len < 0) goto out; @@ -321,10 +306,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) if (!error) error = i; break; - case 4: /* Read/clear last kernel messages */ + /* Read/clear last kernel messages */ + case SYSLOG_ACTION_READ_CLEAR: do_clear = 1; /* FALL THRU */ - case 3: /* Read last kernel messages */ + /* Read last kernel messages */ + case SYSLOG_ACTION_READ_ALL: error = -EINVAL; if (!buf || len < 0) goto out; @@ -377,21 +364,25 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) } } break; - case 5: /* Clear ring buffer */ + /* Clear ring buffer */ + case SYSLOG_ACTION_CLEAR: logged_chars = 0; break; - case 6: /* Disable logging to console */ + /* Disable logging to console */ + case SYSLOG_ACTION_CONSOLE_OFF: if (saved_console_loglevel == -1) saved_console_loglevel = console_loglevel; console_loglevel = minimum_console_loglevel; break; - case 7: /* Enable logging to console */ + /* Enable logging to console */ + case SYSLOG_ACTION_CONSOLE_ON: if (saved_console_loglevel != -1) { console_loglevel = saved_console_loglevel; saved_console_loglevel = -1; } break; - case 8: /* Set level of messages printed to console */ + /* Set level of messages printed to console */ + case SYSLOG_ACTION_CONSOLE_LEVEL: error = -EINVAL; if (len < 1 || len > 8) goto out; @@ -402,10 +393,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) saved_console_loglevel = -1; error = 0; break; - case 9: /* Number of chars in the log buffer */ + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: error = log_end - log_start; break; - case 10: /* Size of the log buffer */ + /* Size of the log buffer */ + case SYSLOG_ACTION_SIZE_BUFFER: error = log_buf_len; break; default: diff --git a/security/commoncap.c b/security/commoncap.c index 677fad9d5cb..cf01b2eebb6 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -897,9 +897,10 @@ error: int cap_syslog(int type, bool from_file) { /* /proc/kmsg can open be opened by CAP_SYS_ADMIN */ - if (type != 1 && from_file) + if (type != SYSLOG_ACTION_OPEN && from_file) return 0; - if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) + if ((type != SYSLOG_ACTION_READ_ALL && + type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN)) return -EPERM; return 0; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a4862a0730f..6b36ce2eef2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2059,20 +2059,21 @@ static int selinux_syslog(int type, bool from_file) return rc; switch (type) { - case 3: /* Read last kernel messages */ - case 10: /* Return size of the log buffer */ + case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ + case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ rc = task_has_system(current, SYSTEM__SYSLOG_READ); break; - case 6: /* Disable logging to console */ - case 7: /* Enable logging to console */ - case 8: /* Set level of messages printed to console */ + case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ + case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ + /* Set level of messages printed to console */ + case SYSLOG_ACTION_CONSOLE_LEVEL: rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE); break; - case 0: /* Close log */ - case 1: /* Open log */ - case 2: /* Read from log */ - case 4: /* Read/clear last kernel messages */ - case 5: /* Clear ring buffer */ + case SYSLOG_ACTION_CLOSE: /* Close log */ + case SYSLOG_ACTION_OPEN: /* Open log */ + case SYSLOG_ACTION_READ: /* Read from log */ + case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */ + case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */ default: rc = task_has_system(current, SYSTEM__SYSLOG_MOD); break; -- cgit v1.2.3-70-g09d2 From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:23:03 +0000 Subject: net: maintain namespace isolation between vlan and real device In the vlan and macvlan drivers, the start_xmit function forwards data to the dev_queue_xmit function for another device, which may potentially belong to a different namespace. To make sure that classification stays within a single namespace, this resets the potentially critical fields. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- include/linux/netdevice.h | 9 +++++++++ net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 42 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index fa0dc514dba..d32e0bdfc5e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -269,7 +269,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) } xmit_world: - skb->dev = vlan->lowerdev; + skb_set_dev(skb, vlan->lowerdev); return dev_queue_xmit(skb); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93a32a5ca74..622ba5aa93c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1004,6 +1004,15 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +#ifndef CONFIG_NET_NS +static inline void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; +} +#else /* CONFIG_NET_NS */ +void skb_set_dev(struct sk_buff *skb, struct net_device *dev); +#endif + static inline bool netdev_uses_trailer_tags(struct net_device *dev) { #ifdef CONFIG_NET_DSA_TAG_TRAILER diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a9e1f178561..9e83272fc5b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -322,7 +322,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, } - skb->dev = vlan_dev_info(dev)->real_dev; + skb_set_dev(skb, vlan_dev_info(dev)->real_dev); len = skb->len; ret = dev_queue_xmit(skb); diff --git a/net/core/dev.c b/net/core/dev.c index 2cba5c521e5..94c1eeed25e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1448,13 +1448,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) if (skb->len > (dev->mtu + dev->hard_header_len)) return NET_RX_DROP; - skb_dst_drop(skb); + skb_set_dev(skb, dev); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1614,6 +1611,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) return false; } +/** + * skb_dev_set -- assign a new device to a buffer + * @skb: buffer for the new device + * @dev: network device + * + * If an skb is owned by a device already, we have to reset + * all data private to the namespace a device belongs to + * before assigning it a new device. + */ +#ifdef CONFIG_NET_NS +void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ + skb_dst_drop(skb); + if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { + secpath_reset(skb); + nf_reset(skb); + skb_init_secmark(skb); + skb->mark = 0; + skb->priority = 0; + skb->nf_trace = 0; + skb->ipvs_property = 0; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif + } + skb->dev = dev; +} +EXPORT_SYMBOL(skb_set_dev); +#endif /* CONFIG_NET_NS */ + /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. -- cgit v1.2.3-70-g09d2 From fc0663d6b5e6d8e9b57f872a644c0aafd82361b7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:23:40 +0000 Subject: macvlan: allow multiple driver backends This makes it possible to hook into the macvlan driver from another kernel module. In particular, the goal is to extend it with the macvtap backend that provides a tun/tap compatible interface directly on the macvlan device. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 113 ++++++++++++++++++++------------------------- include/linux/if_macvlan.h | 70 ++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d32e0bdfc5e..40faa368b07 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -39,31 +39,6 @@ struct macvlan_port { struct list_head vlans; }; -/** - * struct macvlan_rx_stats - MACVLAN percpu rx stats - * @rx_packets: number of received packets - * @rx_bytes: number of received bytes - * @multicast: number of received multicast packets - * @rx_errors: number of errors - */ -struct macvlan_rx_stats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long multicast; - unsigned long rx_errors; -}; - -struct macvlan_dev { - struct net_device *dev; - struct list_head list; - struct hlist_node hlist; - struct macvlan_port *port; - struct net_device *lowerdev; - struct macvlan_rx_stats *rx_stats; - enum macvlan_mode mode; -}; - - static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { @@ -118,31 +93,17 @@ static int macvlan_addr_busy(const struct macvlan_port *port, return 0; } -static inline void macvlan_count_rx(const struct macvlan_dev *vlan, - unsigned int len, bool success, - bool multicast) -{ - struct macvlan_rx_stats *rx_stats; - - rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id()); - if (likely(success)) { - rx_stats->rx_packets++;; - rx_stats->rx_bytes += len; - if (multicast) - rx_stats->multicast++; - } else { - rx_stats->rx_errors++; - } -} -static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev, +static int macvlan_broadcast_one(struct sk_buff *skb, + const struct macvlan_dev *vlan, const struct ethhdr *eth, bool local) { + struct net_device *dev = vlan->dev; if (!skb) return NET_RX_DROP; if (local) - return dev_forward_skb(dev, skb); + return vlan->forward(dev, skb); skb->dev = dev; if (!compare_ether_addr_64bits(eth->h_dest, @@ -151,7 +112,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev, else skb->pkt_type = PACKET_MULTICAST; - return netif_rx(skb); + return vlan->receive(skb); } static void macvlan_broadcast(struct sk_buff *skb, @@ -175,7 +136,7 @@ static void macvlan_broadcast(struct sk_buff *skb, continue; nskb = skb_clone(skb, GFP_ATOMIC); - err = macvlan_broadcast_one(nskb, vlan->dev, eth, + err = macvlan_broadcast_one(nskb, vlan, eth, mode == MACVLAN_MODE_BRIDGE); macvlan_count_rx(vlan, skb->len + ETH_HLEN, err == NET_RX_SUCCESS, 1); @@ -238,7 +199,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) skb->dev = dev; skb->pkt_type = PACKET_HOST; - netif_rx(skb); + vlan->receive(skb); return NULL; } @@ -260,7 +221,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) dest = macvlan_hash_lookup(port, eth->h_dest); if (dest && dest->mode == MACVLAN_MODE_BRIDGE) { unsigned int length = skb->len + ETH_HLEN; - int ret = dev_forward_skb(dest->dev, skb); + int ret = dest->forward(dest->dev, skb); macvlan_count_rx(dest, length, ret == NET_RX_SUCCESS, 0); @@ -273,8 +234,8 @@ xmit_world: return dev_queue_xmit(skb); } -static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, - struct net_device *dev) +netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, + struct net_device *dev) { int i = skb_get_queue_mapping(skb); struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -290,6 +251,7 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, return ret; } +EXPORT_SYMBOL_GPL(macvlan_start_xmit); static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, @@ -623,8 +585,11 @@ static int macvlan_get_tx_queues(struct net *net, return 0; } -static int macvlan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) +int macvlan_common_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + int (*receive)(struct sk_buff *skb), + int (*forward)(struct net_device *dev, + struct sk_buff *skb)) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port; @@ -664,6 +629,8 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, vlan->lowerdev = lowerdev; vlan->dev = dev; vlan->port = port; + vlan->receive = receive; + vlan->forward = forward; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) @@ -677,8 +644,17 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, netif_stacked_transfer_operstate(lowerdev, dev); return 0; } +EXPORT_SYMBOL_GPL(macvlan_common_newlink); -static void macvlan_dellink(struct net_device *dev, struct list_head *head) +static int macvlan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + return macvlan_common_newlink(src_net, dev, tb, data, + netif_rx, + dev_forward_skb); +} + +void macvlan_dellink(struct net_device *dev, struct list_head *head) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port = vlan->port; @@ -689,6 +665,7 @@ static void macvlan_dellink(struct net_device *dev, struct list_head *head) if (list_empty(&port->vlans)) macvlan_port_destroy(port->dev); } +EXPORT_SYMBOL_GPL(macvlan_dellink); static int macvlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) @@ -720,19 +697,27 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { [IFLA_MACVLAN_MODE] = { .type = NLA_U32 }, }; -static struct rtnl_link_ops macvlan_link_ops __read_mostly = { +int macvlan_link_register(struct rtnl_link_ops *ops) +{ + /* common fields */ + ops->priv_size = sizeof(struct macvlan_dev); + ops->get_tx_queues = macvlan_get_tx_queues; + ops->setup = macvlan_setup; + ops->validate = macvlan_validate; + ops->maxtype = IFLA_MACVLAN_MAX; + ops->policy = macvlan_policy; + ops->changelink = macvlan_changelink; + ops->get_size = macvlan_get_size; + ops->fill_info = macvlan_fill_info; + + return rtnl_link_register(ops); +}; +EXPORT_SYMBOL_GPL(macvlan_link_register); + +static struct rtnl_link_ops macvlan_link_ops = { .kind = "macvlan", - .priv_size = sizeof(struct macvlan_dev), - .get_tx_queues = macvlan_get_tx_queues, - .setup = macvlan_setup, - .validate = macvlan_validate, .newlink = macvlan_newlink, .dellink = macvlan_dellink, - .maxtype = IFLA_MACVLAN_MAX, - .policy = macvlan_policy, - .changelink = macvlan_changelink, - .get_size = macvlan_get_size, - .fill_info = macvlan_fill_info, }; static int macvlan_device_event(struct notifier_block *unused, @@ -761,7 +746,7 @@ static int macvlan_device_event(struct notifier_block *unused, break; case NETDEV_UNREGISTER: list_for_each_entry_safe(vlan, next, &port->vlans, list) - macvlan_dellink(vlan->dev, NULL); + vlan->dev->rtnl_link_ops->dellink(vlan->dev, NULL); break; } return NOTIFY_DONE; @@ -778,7 +763,7 @@ static int __init macvlan_init_module(void) register_netdevice_notifier(&macvlan_notifier_block); macvlan_handle_frame_hook = macvlan_handle_frame; - err = rtnl_link_register(&macvlan_link_ops); + err = macvlan_link_register(&macvlan_link_ops); if (err < 0) goto err1; return 0; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 5f200bac374..9a11544bb0b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -1,6 +1,76 @@ #ifndef _LINUX_IF_MACVLAN_H #define _LINUX_IF_MACVLAN_H +#include +#include +#include +#include +#include + +struct macvlan_port; +struct macvtap_queue; + +/** + * struct macvlan_rx_stats - MACVLAN percpu rx stats + * @rx_packets: number of received packets + * @rx_bytes: number of received bytes + * @multicast: number of received multicast packets + * @rx_errors: number of errors + */ +struct macvlan_rx_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long multicast; + unsigned long rx_errors; +}; + +struct macvlan_dev { + struct net_device *dev; + struct list_head list; + struct hlist_node hlist; + struct macvlan_port *port; + struct net_device *lowerdev; + struct macvlan_rx_stats *rx_stats; + enum macvlan_mode mode; + int (*receive)(struct sk_buff *skb); + int (*forward)(struct net_device *dev, struct sk_buff *skb); +}; + +static inline void macvlan_count_rx(const struct macvlan_dev *vlan, + unsigned int len, bool success, + bool multicast) +{ + struct macvlan_rx_stats *rx_stats; + + rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id()); + if (likely(success)) { + rx_stats->rx_packets++;; + rx_stats->rx_bytes += len; + if (multicast) + rx_stats->multicast++; + } else { + rx_stats->rx_errors++; + } +} + +extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + int (*receive)(struct sk_buff *skb), + int (*forward)(struct net_device *dev, + struct sk_buff *skb)); + +extern void macvlan_count_rx(const struct macvlan_dev *vlan, + unsigned int len, bool success, + bool multicast); + +extern void macvlan_dellink(struct net_device *dev, struct list_head *head); + +extern int macvlan_link_register(struct rtnl_link_ops *ops); + +extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, + struct net_device *dev); + + extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); #endif /* _LINUX_IF_MACVLAN_H */ -- cgit v1.2.3-70-g09d2 From 20d29d7a916a47bf533b5709437fe735b6b5b79e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:24:26 +0000 Subject: net: macvtap driver In order to use macvlan with qemu and other tools that require a tap file descriptor, the macvtap driver adds a small backend with a character device with the same interface as the tun driver, with a minimum set of features. Macvtap interfaces are created in the same way as macvlan interfaces using ip link, but the netif is just used as a handle for configuration and accounting, while the data goes through the chardev. Each macvtap interface has its own character device, simplifying permission management significantly over the generic tun/tap driver. Cc: Patrick McHardy Cc: Stephen Hemminger Cc: David S. Miller" Cc: "Michael S. Tsirkin" Cc: Herbert Xu Cc: Or Gerlitz Cc: netdev@vger.kernel.org Cc: bridge@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/Kconfig | 12 + drivers/net/Makefile | 1 + drivers/net/macvtap.c | 581 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_macvlan.h | 1 + 4 files changed, 595 insertions(+) create mode 100644 drivers/net/macvtap.c (limited to 'include/linux') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index cb0e534418e..411e2070311 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -90,6 +90,18 @@ config MACVLAN To compile this driver as a module, choose M here: the module will be called macvlan. +config MACVTAP + tristate "MAC-VLAN based tap driver (EXPERIMENTAL)" + depends on MACVLAN + help + This adds a specialized tap character device driver that is based + on the MAC-VLAN network interface, called macvtap. A macvtap device + can be added in the same way as a macvlan device, using 'type + macvlan', and then be accessed through the tap user space interface. + + To compile this driver as a module, choose M here: the module + will be called macvtap. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0b763cbe9b1..95958032cd3 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -169,6 +169,7 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACVLAN) += macvlan.o +obj-$(CONFIG_MACVTAP) += macvtap.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_LANCE) += lance.o diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c new file mode 100644 index 00000000000..ad1f6ef8930 --- /dev/null +++ b/drivers/net/macvtap.c @@ -0,0 +1,581 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * A macvtap queue is the central object of this driver, it connects + * an open character device to a macvlan interface. There can be + * multiple queues on one interface, which map back to queues + * implemented in hardware on the underlying device. + * + * macvtap_proto is used to allocate queues through the sock allocation + * mechanism. + * + * TODO: multiqueue support is currently not implemented, even though + * macvtap is basically prepared for that. We will need to add this + * here as well as in virtio-net and qemu to get line rate on 10gbit + * adapters from a guest. + */ +struct macvtap_queue { + struct sock sk; + struct socket sock; + struct macvlan_dev *vlan; + struct file *file; +}; + +static struct proto macvtap_proto = { + .name = "macvtap", + .owner = THIS_MODULE, + .obj_size = sizeof (struct macvtap_queue), +}; + +/* + * Minor number matches netdev->ifindex, so need a potentially + * large value. This also makes it possible to split the + * tap functionality out again in the future by offering it + * from other drivers besides macvtap. As long as every device + * only has one tap, the interface numbers assure that the + * device nodes are unique. + */ +static unsigned int macvtap_major; +#define MACVTAP_NUM_DEVS 65536 +static struct class *macvtap_class; +static struct cdev macvtap_cdev; + +/* + * RCU usage: + * The macvtap_queue is referenced both from the chardev struct file + * and from the struct macvlan_dev using rcu_read_lock. + * + * We never actually update the contents of a macvtap_queue atomically + * with RCU but it is used for race-free destruction of a queue when + * either the file or the macvlan_dev goes away. Pointers back to + * the dev and the file are implicitly valid as long as the queue + * exists. + * + * The callbacks from macvlan are always done with rcu_read_lock held + * already, while in the file_operations, we get it ourselves. + * + * When destroying a queue, we remove the pointers from the file and + * from the dev and then synchronize_rcu to make sure no thread is + * still using the queue. There may still be references to the struct + * sock inside of the queue from outbound SKBs, but these never + * reference back to the file or the dev. The data structure is freed + * through __sk_free when both our references and any pending SKBs + * are gone. + * + * macvtap_lock is only used to prevent multiple concurrent open() + * calls to assign a new vlan->tap pointer. It could be moved into + * the macvlan_dev itself but is extremely rarely used. + */ +static DEFINE_SPINLOCK(macvtap_lock); + +/* + * Choose the next free queue, for now there is only one + */ +static int macvtap_set_queue(struct net_device *dev, struct file *file, + struct macvtap_queue *q) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err = -EBUSY; + + spin_lock(&macvtap_lock); + if (rcu_dereference(vlan->tap)) + goto out; + + err = 0; + q->vlan = vlan; + rcu_assign_pointer(vlan->tap, q); + + q->file = file; + rcu_assign_pointer(file->private_data, q); + +out: + spin_unlock(&macvtap_lock); + return err; +} + +/* + * We must destroy each queue exactly once, when either + * the netdev or the file go away. + * + * Using the spinlock makes sure that we don't get + * to the queue again after destroying it. + * + * synchronize_rcu serializes with the packet flow + * that uses rcu_read_lock. + */ +static void macvtap_del_queue(struct macvtap_queue **qp) +{ + struct macvtap_queue *q; + + spin_lock(&macvtap_lock); + q = rcu_dereference(*qp); + if (!q) { + spin_unlock(&macvtap_lock); + return; + } + + rcu_assign_pointer(q->vlan->tap, NULL); + rcu_assign_pointer(q->file->private_data, NULL); + spin_unlock(&macvtap_lock); + + synchronize_rcu(); + sock_put(&q->sk); +} + +/* + * Since we only support one queue, just dereference the pointer. + */ +static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, + struct sk_buff *skb) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + return rcu_dereference(vlan->tap); +} + +static void macvtap_del_queues(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + macvtap_del_queue(&vlan->tap); +} + +static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file) +{ + rcu_read_lock_bh(); + return rcu_dereference(file->private_data); +} + +static inline void macvtap_file_put_queue(void) +{ + rcu_read_unlock_bh(); +} + +/* + * Forward happens for data that gets sent from one macvlan + * endpoint to another one in bridge mode. We just take + * the skb and put it into the receive queue. + */ +static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) +{ + struct macvtap_queue *q = macvtap_get_queue(dev, skb); + if (!q) + return -ENOLINK; + + skb_queue_tail(&q->sk.sk_receive_queue, skb); + wake_up(q->sk.sk_sleep); + return 0; +} + +/* + * Receive is for data from the external interface (lowerdev), + * in case of macvtap, we can treat that the same way as + * forward, which macvlan cannot. + */ +static int macvtap_receive(struct sk_buff *skb) +{ + skb_push(skb, ETH_HLEN); + return macvtap_forward(skb->dev, skb); +} + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct device *classdev; + dev_t devt; + int err; + + err = macvlan_common_newlink(src_net, dev, tb, data, + macvtap_receive, macvtap_forward); + if (err) + goto out; + + devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); + + classdev = device_create(macvtap_class, &dev->dev, devt, + dev, "tap%d", dev->ifindex); + if (IS_ERR(classdev)) { + err = PTR_ERR(classdev); + macvtap_del_queues(dev); + } + +out: + return err; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + device_destroy(macvtap_class, + MKDEV(MAJOR(macvtap_major), dev->ifindex)); + + macvtap_del_queues(dev); + macvlan_dellink(dev, head); +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .newlink = macvtap_newlink, + .dellink = macvtap_dellink, +}; + + +static void macvtap_sock_write_space(struct sock *sk) +{ + if (!sock_writeable(sk) || + !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + return; + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_sync(sk->sk_sleep); +} + +static int macvtap_open(struct inode *inode, struct file *file) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev = dev_get_by_index(net, iminor(inode)); + struct macvtap_queue *q; + int err; + + err = -ENODEV; + if (!dev) + goto out; + + /* check if this is a macvtap device */ + err = -EINVAL; + if (dev->rtnl_link_ops != &macvtap_link_ops) + goto out; + + err = -ENOMEM; + q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &macvtap_proto); + if (!q) + goto out; + + init_waitqueue_head(&q->sock.wait); + q->sock.type = SOCK_RAW; + q->sock.state = SS_CONNECTED; + sock_init_data(&q->sock, &q->sk); + q->sk.sk_allocation = GFP_ATOMIC; /* for now */ + q->sk.sk_write_space = macvtap_sock_write_space; + + err = macvtap_set_queue(dev, file, q); + if (err) + sock_put(&q->sk); + +out: + if (dev) + dev_put(dev); + + return err; +} + +static int macvtap_release(struct inode *inode, struct file *file) +{ + macvtap_del_queue((struct macvtap_queue **)&file->private_data); + return 0; +} + +static unsigned int macvtap_poll(struct file *file, poll_table * wait) +{ + struct macvtap_queue *q = macvtap_file_get_queue(file); + unsigned int mask = POLLERR; + + if (!q) + goto out; + + mask = 0; + poll_wait(file, &q->sock.wait, wait); + + if (!skb_queue_empty(&q->sk.sk_receive_queue)) + mask |= POLLIN | POLLRDNORM; + + if (sock_writeable(&q->sk) || + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && + sock_writeable(&q->sk))) + mask |= POLLOUT | POLLWRNORM; + +out: + macvtap_file_put_queue(); + return mask; +} + +/* Get packet from user space buffer */ +static ssize_t macvtap_get_user(struct macvtap_queue *q, + const struct iovec *iv, size_t count, + int noblock) +{ + struct sk_buff *skb; + size_t len = count; + int err; + + if (unlikely(len < ETH_HLEN)) + return -EINVAL; + + skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err); + + if (!skb) { + macvlan_count_rx(q->vlan, 0, false, false); + return err; + } + + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, count); + + if (skb_copy_datagram_from_iovec(skb, 0, iv, 0, len)) { + macvlan_count_rx(q->vlan, 0, false, false); + kfree_skb(skb); + return -EFAULT; + } + + skb_set_network_header(skb, ETH_HLEN); + + macvlan_start_xmit(skb, q->vlan->dev); + + return count; +} + +static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + ssize_t result = -ENOLINK; + struct macvtap_queue *q = macvtap_file_get_queue(file); + + if (!q) + goto out; + + result = macvtap_get_user(q, iv, iov_length(iv, count), + file->f_flags & O_NONBLOCK); +out: + macvtap_file_put_queue(); + return result; +} + +/* Put packet to the user space buffer */ +static ssize_t macvtap_put_user(struct macvtap_queue *q, + const struct sk_buff *skb, + const struct iovec *iv, int len) +{ + struct macvlan_dev *vlan = q->vlan; + int ret; + + len = min_t(int, skb->len, len); + + ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len); + + macvlan_count_rx(vlan, len, ret == 0, 0); + + return ret ? ret : len; +} + +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, + unsigned long count, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct macvtap_queue *q = macvtap_file_get_queue(file); + + DECLARE_WAITQUEUE(wait, current); + struct sk_buff *skb; + ssize_t len, ret = 0; + + if (!q) { + ret = -ENOLINK; + goto out; + } + + len = iov_length(iv, count); + if (len < 0) { + ret = -EINVAL; + goto out; + } + + add_wait_queue(q->sk.sk_sleep, &wait); + while (len) { + current->state = TASK_INTERRUPTIBLE; + + /* Read frames from the queue */ + skb = skb_dequeue(&q->sk.sk_receive_queue); + if (!skb) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + /* Nothing to read, let's sleep */ + schedule(); + continue; + } + ret = macvtap_put_user(q, skb, iv, len); + kfree_skb(skb); + break; + } + + current->state = TASK_RUNNING; + remove_wait_queue(q->sk.sk_sleep, &wait); + +out: + macvtap_file_put_queue(); + return ret; +} + +/* + * provide compatibility with generic tun/tap interface + */ +static long macvtap_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct macvtap_queue *q; + void __user *argp = (void __user *)arg; + struct ifreq __user *ifr = argp; + unsigned int __user *up = argp; + unsigned int u; + char devname[IFNAMSIZ]; + + switch (cmd) { + case TUNSETIFF: + /* ignore the name, just look at flags */ + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + if (u != (IFF_TAP | IFF_NO_PI)) + return -EINVAL; + return 0; + + case TUNGETIFF: + q = macvtap_file_get_queue(file); + if (!q) + return -ENOLINK; + memcpy(devname, q->vlan->dev->name, sizeof(devname)); + macvtap_file_put_queue(); + + if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || + put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags)) + return -EFAULT; + return 0; + + case TUNGETFEATURES: + if (put_user((IFF_TAP | IFF_NO_PI), up)) + return -EFAULT; + return 0; + + case TUNSETSNDBUF: + if (get_user(u, up)) + return -EFAULT; + + q = macvtap_file_get_queue(file); + q->sk.sk_sndbuf = u; + macvtap_file_put_queue(); + return 0; + + case TUNSETOFFLOAD: + /* let the user check for future flags */ + if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | + TUN_F_TSO_ECN | TUN_F_UFO)) + return -EINVAL; + + /* TODO: add support for these, so far we don't + support any offload */ + if (arg & (TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | + TUN_F_TSO_ECN | TUN_F_UFO)) + return -EINVAL; + + return 0; + + default: + return -EINVAL; + } +} + +#ifdef CONFIG_COMPAT +static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations macvtap_fops = { + .owner = THIS_MODULE, + .open = macvtap_open, + .release = macvtap_release, + .aio_read = macvtap_aio_read, + .aio_write = macvtap_aio_write, + .poll = macvtap_poll, + .llseek = no_llseek, + .unlocked_ioctl = macvtap_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = macvtap_compat_ioctl, +#endif +}; + +static int macvtap_init(void) +{ + int err; + + err = alloc_chrdev_region(&macvtap_major, 0, + MACVTAP_NUM_DEVS, "macvtap"); + if (err) + goto out1; + + cdev_init(&macvtap_cdev, &macvtap_fops); + err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); + if (err) + goto out2; + + macvtap_class = class_create(THIS_MODULE, "macvtap"); + if (IS_ERR(macvtap_class)) { + err = PTR_ERR(macvtap_class); + goto out3; + } + + err = macvlan_link_register(&macvtap_link_ops); + if (err) + goto out4; + + return 0; + +out4: + class_unregister(macvtap_class); +out3: + cdev_del(&macvtap_cdev); +out2: + unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); +out1: + return err; +} +module_init(macvtap_init); + +static void macvtap_exit(void) +{ + rtnl_link_unregister(&macvtap_link_ops); + class_unregister(macvtap_class); + cdev_del(&macvtap_cdev); + unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); +} +module_exit(macvtap_exit); + +MODULE_ALIAS_RTNL_LINK("macvtap"); +MODULE_AUTHOR("Arnd Bergmann "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 9a11544bb0b..51f1512045e 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -34,6 +34,7 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); + struct macvtap_queue *tap; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3-70-g09d2 From 1621e0940294c20e302faf401f41204de7252e22 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 1 Feb 2010 09:44:19 +0000 Subject: net: CONFIG_COMPAT redux Ifdef out struct proto_ops::compat_ioctl struct proto_ops::compat_setsockopt struct proto_ops::compat_getsockopt to make structures smaller on COMPAT=n kernels. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 5e8083cacc8..4157b5d42bd 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -174,18 +174,22 @@ struct proto_ops { struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); +#ifdef CONFIG_COMPAT int (*compat_ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); +#endif int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); +#endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); int (*recvmsg) (struct kiocb *iocb, struct socket *sock, -- cgit v1.2.3-70-g09d2 From 0b7024ac4df5821347141c18e680b7166bc1cb20 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 2 Feb 2010 21:08:26 -0800 Subject: Input: add match() method to input hanlders Get rid of blacklist in input handler structure and instead allow handlers to define their own match() method to perform fine-grained filtering of supported devices. Signed-off-by: Dmitry Torokhov --- drivers/char/keyboard.c | 24 ++++++++++++++++-------- drivers/input/input.c | 13 ++++++------- drivers/input/joydev.c | 32 +++++++++++++++----------------- include/linux/input.h | 6 +++--- 4 files changed, 40 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index cbf64b985ef..ada25bb8941 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1323,6 +1323,21 @@ static void kbd_event(struct input_handle *handle, unsigned int event_type, schedule_console_callback(); } +static bool kbd_match(struct input_handler *handler, struct input_dev *dev) +{ + int i; + + if (test_bit(EV_SND, dev->evbit)) + return true; + + if (test_bit(EV_KEY, dev->evbit)) + for (i = KEY_RESERVED; i < BTN_MISC; i++) + if (test_bit(i, dev->keybit)) + return true; + + return false; +} + /* * When a keyboard (or other input device) is found, the kbd_connect * function is called. The function then looks at the device, and if it @@ -1334,14 +1349,6 @@ static int kbd_connect(struct input_handler *handler, struct input_dev *dev, { struct input_handle *handle; int error; - int i; - - for (i = KEY_RESERVED; i < BTN_MISC; i++) - if (test_bit(i, dev->keybit)) - break; - - if (i == BTN_MISC && !test_bit(EV_SND, dev->evbit)) - return -ENODEV; handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); if (!handle) @@ -1407,6 +1414,7 @@ MODULE_DEVICE_TABLE(input, kbd_ids); static struct input_handler kbd_handler = { .event = kbd_event, + .match = kbd_match, .connect = kbd_connect, .disconnect = kbd_disconnect, .start = kbd_start, diff --git a/drivers/input/input.c b/drivers/input/input.c index 7080a9d4b84..dae49eba6cc 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -723,12 +723,13 @@ EXPORT_SYMBOL(input_set_keycode); if (i != BITS_TO_LONGS(max)) \ continue; -static const struct input_device_id *input_match_device(const struct input_device_id *id, +static const struct input_device_id *input_match_device(struct input_handler *handler, struct input_dev *dev) { + const struct input_device_id *id; int i; - for (; id->flags || id->driver_info; id++) { + for (id = handler->id_table; id->flags || id->driver_info; id++) { if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) if (id->bustype != dev->id.bustype) @@ -756,7 +757,8 @@ static const struct input_device_id *input_match_device(const struct input_devic MATCH_BIT(ffbit, FF_MAX); MATCH_BIT(swbit, SW_MAX); - return id; + if (!handler->match || handler->match(handler, dev)) + return id; } return NULL; @@ -767,10 +769,7 @@ static int input_attach_handler(struct input_dev *dev, struct input_handler *han const struct input_device_id *id; int error; - if (handler->blacklist && input_match_device(handler->blacklist, dev)) - return -ENODEV; - - id = input_match_device(handler->id_table, dev); + id = input_match_device(handler, dev); if (!id) return -ENODEV; diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index b1bd6dd3228..63e71f2a7ac 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -775,6 +775,20 @@ static void joydev_cleanup(struct joydev *joydev) input_close_device(handle); } + +static bool joydev_match(struct input_handler *handler, struct input_dev *dev) +{ + /* Avoid touchpads and touchscreens */ + if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_TOUCH, dev->keybit)) + return false; + + /* Avoid tablets, digitisers and similar devices */ + if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_DIGI, dev->keybit)) + return false; + + return true; +} + static int joydev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { @@ -894,22 +908,6 @@ static void joydev_disconnect(struct input_handle *handle) put_device(&joydev->dev); } -static const struct input_device_id joydev_blacklist[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, - }, /* Avoid itouchpads and touchscreens */ - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { [BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_DIGI) }, - }, /* Avoid tablets, digitisers and similar devices */ - { } /* Terminating entry */ -}; - static const struct input_device_id joydev_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | @@ -936,13 +934,13 @@ MODULE_DEVICE_TABLE(input, joydev_ids); static struct input_handler joydev_handler = { .event = joydev_event, + .match = joydev_match, .connect = joydev_connect, .disconnect = joydev_disconnect, .fops = &joydev_fops, .minor = JOYDEV_MINOR_BASE, .name = "joydev", .id_table = joydev_ids, - .blacklist = joydev_blacklist, }; static int __init joydev_init(void) diff --git a/include/linux/input.h b/include/linux/input.h index 6c9d3d49fa9..8dc5d724c70 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1200,6 +1200,8 @@ struct input_handle; * it may not sleep * @filter: similar to @event; separates normal event handlers from * "filters". + * @match: called after comparing device's id with handler's id_table + * to perform fine-grained matching between device and handler * @connect: called when attaching a handler to an input device * @disconnect: disconnects a handler from input device * @start: starts handler for given handle. This function is called by @@ -1211,8 +1213,6 @@ struct input_handle; * @name: name of the handler, to be shown in /proc/bus/input/handlers * @id_table: pointer to a table of input_device_ids this driver can * handle - * @blacklist: pointer to a table of input_device_ids this driver should - * ignore even if they match @id_table * @h_list: list of input handles associated with the handler * @node: for placing the driver onto input_handler_list * @@ -1235,6 +1235,7 @@ struct input_handler { void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); + bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); void (*disconnect)(struct input_handle *handle); void (*start)(struct input_handle *handle); @@ -1244,7 +1245,6 @@ struct input_handler { const char *name; const struct input_device_id *id_table; - const struct input_device_id *blacklist; struct list_head h_list; struct list_head node; -- cgit v1.2.3-70-g09d2 From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:11 -0500 Subject: ftrace/alternatives: Introducing *_text_reserved functions Introducing *_text_reserved functions for checking the text address range is partially reserved or not. This patch provides checking routines for x86 smp alternatives and dynamic ftrace. Since both functions modify fixed pieces of kernel text, they should reserve and protect those from other dynamic text modifier, like kprobes. This will also be extended when introducing other subsystems which modify fixed pieces of kernel text. Dynamic text modifiers should avoid those. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/alternative.c | 16 ++++++++++++++++ include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 15 +++++++++++++++ 4 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 69b74a7b877..ac80b7d7001 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); #else static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_SMP */ /* alternative assembly primitive: */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c0ce9..3c13284ff86 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp) mutex_unlock(&smp_alt); } +/* Return 1 if the address range is reserved for smp-alternatives */ +int alternatives_text_reserved(void *start, void *end) +{ + struct smp_alt_module *mod; + u8 **ptr; + + list_for_each_entry(mod, &smp_alt_modules, next) { + if (mod->text > end || mod->text_end < start) + continue; + for (ptr = mod->locks; ptr < mod->locks_end; ptr++) + if (start <= *ptr && end >= *ptr) + return 1; + } + + return 0; +} #endif #ifdef CONFIG_PARAVIRT diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b4f97d24d7..9d127efed43 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -134,6 +134,8 @@ extern void unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); extern void unregister_ftrace_function_probe_all(char *glob); +extern int ftrace_text_reserved(void *start, void *end); + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -250,6 +252,10 @@ static inline int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } +static inline int ftrace_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e6640f8045..3d90661a5f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1025,6 +1025,21 @@ static void ftrace_bug(int failed, unsigned long ip) } +/* Return 1 if the address range is reserved for ftrace */ +int ftrace_text_reserved(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + if (rec->ip <= (unsigned long)end && + rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) + return 1; + } while_for_each_ftrace_rec(); + return 0; +} + + static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { -- cgit v1.2.3-70-g09d2 From f24bb999d2b9f2950e5cac5b69bffedf73c24ea4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:25 -0500 Subject: ftrace: Remove record freezing Remove record freezing. Because kprobes never puts probe on ftrace's mcount call anymore, it doesn't need ftrace to check whether kprobes on it. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker LKML-Reference: <20100202214925.4694.73469.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 39 --------------------------------------- 2 files changed, 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9d127efed43..eb054ae9560 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -143,7 +143,6 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), - FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3d90661a5f4..1904797f4a8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records; } \ } -#ifdef CONFIG_KPROBES - -static int frozen_record_count; - -static inline void freeze_record(struct dyn_ftrace *rec) -{ - if (!(rec->flags & FTRACE_FL_FROZEN)) { - rec->flags |= FTRACE_FL_FROZEN; - frozen_record_count++; - } -} - -static inline void unfreeze_record(struct dyn_ftrace *rec) -{ - if (rec->flags & FTRACE_FL_FROZEN) { - rec->flags &= ~FTRACE_FL_FROZEN; - frozen_record_count--; - } -} - -static inline int record_frozen(struct dyn_ftrace *rec) -{ - return rec->flags & FTRACE_FL_FROZEN; -} -#else -# define freeze_record(rec) ({ 0; }) -# define unfreeze_record(rec) ({ 0; }) -# define record_frozen(rec) ({ 0; }) -#endif /* CONFIG_KPROBES */ - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->freelist = ftrace_free_records; @@ -1091,14 +1060,6 @@ static void ftrace_replace_code(int enable) !(rec->flags & FTRACE_FL_CONVERTED)) continue; - /* ignore updates to this record's mcount site */ - if (get_kprobe((void *)rec->ip)) { - freeze_record(rec); - continue; - } else { - unfreeze_record(rec); - } - failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; -- cgit v1.2.3-70-g09d2 From 9e3af04f8787315f63f55b191bb9a06741dbf183 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 4 Feb 2010 00:48:00 -0800 Subject: Input: gpio-keys - add support for disabling gpios through sysfs Now gpio-keys input driver exports 4 new attributes to userland through sysfs: /sys/devices/platform/gpio-keys/keys [ro] /sys/devices/platform/gpio-keys/switches [ro] /sys/devices/platform/gpio-keys/disabled_keys [rw] /sys/devices/platform/gpio-keys/disables_switches [rw] With these attributes, userland program can read which keys and switches can be disabled and then disable/enable them as needed. Keys and switches are exported as stringified bitmap of codes (keycodes or switch codes). For example keys 15, 89, 100, 101, 102 are exported as: '15,89,100-102'. Description of the attributes: keys - bitmap of keys which can be disabled switches - bitmap of switches which can be disabled disabled_keys - bitmap of currently disabled keys (bit 1 means disabled, 0 enabled) disabled_switches - bitmap of currently disabled switches (bit 1 means disabled, 0 enabled) Signed-off-by: Mika Westerberg Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 318 +++++++++++++++++++++++++++++++++++-- include/linux/gpio_keys.h | 1 + 2 files changed, 308 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 1aff3b76eff..2b708aa8555 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -30,13 +30,289 @@ struct gpio_button_data { struct input_dev *input; struct timer_list timer; struct work_struct work; + bool disabled; }; struct gpio_keys_drvdata { struct input_dev *input; + struct mutex disable_lock; + unsigned int n_buttons; struct gpio_button_data data[0]; }; +/* + * SYSFS interface for enabling/disabling keys and switches: + * + * There are 4 attributes under /sys/devices/platform/gpio-keys/ + * keys [ro] - bitmap of keys (EV_KEY) which can be + * disabled + * switches [ro] - bitmap of switches (EV_SW) which can be + * disabled + * disabled_keys [rw] - bitmap of keys currently disabled + * disabled_switches [rw] - bitmap of switches currently disabled + * + * Userland can change these values and hence disable event generation + * for each key (or switch). Disabling a key means its interrupt line + * is disabled. + * + * For example, if we have following switches set up as gpio-keys: + * SW_DOCK = 5 + * SW_CAMERA_LENS_COVER = 9 + * SW_KEYPAD_SLIDE = 10 + * SW_FRONT_PROXIMITY = 11 + * This is read from switches: + * 11-9,5 + * Next we want to disable proximity (11) and dock (5), we write: + * 11,5 + * to file disabled_switches. Now proximity and dock IRQs are disabled. + * This can be verified by reading the file disabled_switches: + * 11,5 + * If we now want to enable proximity (11) switch we write: + * 5 + * to disabled_switches. + * + * We can disable only those keys which don't allow sharing the irq. + */ + +/** + * get_n_events_by_type() - returns maximum number of events per @type + * @type: type of button (%EV_KEY, %EV_SW) + * + * Return value of this function can be used to allocate bitmap + * large enough to hold all bits for given type. + */ +static inline int get_n_events_by_type(int type) +{ + BUG_ON(type != EV_SW && type != EV_KEY); + + return (type == EV_KEY) ? KEY_CNT : SW_CNT; +} + +/** + * gpio_keys_disable_button() - disables given GPIO button + * @bdata: button data for button to be disabled + * + * Disables button pointed by @bdata. This is done by masking + * IRQ line. After this function is called, button won't generate + * input events anymore. Note that one can only disable buttons + * that don't share IRQs. + * + * Make sure that @bdata->disable_lock is locked when entering + * this function to avoid races when concurrent threads are + * disabling buttons at the same time. + */ +static void gpio_keys_disable_button(struct gpio_button_data *bdata) +{ + if (!bdata->disabled) { + /* + * Disable IRQ and possible debouncing timer. + */ + disable_irq(gpio_to_irq(bdata->button->gpio)); + if (bdata->button->debounce_interval) + del_timer_sync(&bdata->timer); + + bdata->disabled = true; + } +} + +/** + * gpio_keys_enable_button() - enables given GPIO button + * @bdata: button data for button to be disabled + * + * Enables given button pointed by @bdata. + * + * Make sure that @bdata->disable_lock is locked when entering + * this function to avoid races with concurrent threads trying + * to enable the same button at the same time. + */ +static void gpio_keys_enable_button(struct gpio_button_data *bdata) +{ + if (bdata->disabled) { + enable_irq(gpio_to_irq(bdata->button->gpio)); + bdata->disabled = false; + } +} + +/** + * gpio_keys_attr_show_helper() - fill in stringified bitmap of buttons + * @ddata: pointer to drvdata + * @buf: buffer where stringified bitmap is written + * @type: button type (%EV_KEY, %EV_SW) + * @only_disabled: does caller want only those buttons that are + * currently disabled or all buttons that can be + * disabled + * + * This function writes buttons that can be disabled to @buf. If + * @only_disabled is true, then @buf contains only those buttons + * that are currently disabled. Returns 0 on success or negative + * errno on failure. + */ +static ssize_t gpio_keys_attr_show_helper(struct gpio_keys_drvdata *ddata, + char *buf, unsigned int type, + bool only_disabled) +{ + int n_events = get_n_events_by_type(type); + unsigned long *bits; + ssize_t ret; + int i; + + bits = kcalloc(BITS_TO_LONGS(n_events), sizeof(*bits), GFP_KERNEL); + if (!bits) + return -ENOMEM; + + for (i = 0; i < ddata->n_buttons; i++) { + struct gpio_button_data *bdata = &ddata->data[i]; + + if (bdata->button->type != type) + continue; + + if (only_disabled && !bdata->disabled) + continue; + + __set_bit(bdata->button->code, bits); + } + + ret = bitmap_scnlistprintf(buf, PAGE_SIZE - 2, bits, n_events); + buf[ret++] = '\n'; + buf[ret] = '\0'; + + kfree(bits); + + return ret; +} + +/** + * gpio_keys_attr_store_helper() - enable/disable buttons based on given bitmap + * @ddata: pointer to drvdata + * @buf: buffer from userspace that contains stringified bitmap + * @type: button type (%EV_KEY, %EV_SW) + * + * This function parses stringified bitmap from @buf and disables/enables + * GPIO buttons accordinly. Returns 0 on success and negative error + * on failure. + */ +static ssize_t gpio_keys_attr_store_helper(struct gpio_keys_drvdata *ddata, + const char *buf, unsigned int type) +{ + int n_events = get_n_events_by_type(type); + unsigned long *bits; + ssize_t error; + int i; + + bits = kcalloc(BITS_TO_LONGS(n_events), sizeof(*bits), GFP_KERNEL); + if (!bits) + return -ENOMEM; + + error = bitmap_parselist(buf, bits, n_events); + if (error) + goto out; + + /* First validate */ + for (i = 0; i < ddata->n_buttons; i++) { + struct gpio_button_data *bdata = &ddata->data[i]; + + if (bdata->button->type != type) + continue; + + if (test_bit(bdata->button->code, bits) && + !bdata->button->can_disable) { + error = -EINVAL; + goto out; + } + } + + mutex_lock(&ddata->disable_lock); + + for (i = 0; i < ddata->n_buttons; i++) { + struct gpio_button_data *bdata = &ddata->data[i]; + + if (bdata->button->type != type) + continue; + + if (test_bit(bdata->button->code, bits)) + gpio_keys_disable_button(bdata); + else + gpio_keys_enable_button(bdata); + } + + mutex_unlock(&ddata->disable_lock); + +out: + kfree(bits); + return error; +} + +#define ATTR_SHOW_FN(name, type, only_disabled) \ +static ssize_t gpio_keys_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct platform_device *pdev = to_platform_device(dev); \ + struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); \ + \ + return gpio_keys_attr_show_helper(ddata, buf, \ + type, only_disabled); \ +} + +ATTR_SHOW_FN(keys, EV_KEY, false); +ATTR_SHOW_FN(switches, EV_SW, false); +ATTR_SHOW_FN(disabled_keys, EV_KEY, true); +ATTR_SHOW_FN(disabled_switches, EV_SW, true); + +/* + * ATTRIBUTES: + * + * /sys/devices/platform/gpio-keys/keys [ro] + * /sys/devices/platform/gpio-keys/switches [ro] + */ +static DEVICE_ATTR(keys, S_IRUGO, gpio_keys_show_keys, NULL); +static DEVICE_ATTR(switches, S_IRUGO, gpio_keys_show_switches, NULL); + +#define ATTR_STORE_FN(name, type) \ +static ssize_t gpio_keys_store_##name(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, \ + size_t count) \ +{ \ + struct platform_device *pdev = to_platform_device(dev); \ + struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); \ + ssize_t error; \ + \ + error = gpio_keys_attr_store_helper(ddata, buf, type); \ + if (error) \ + return error; \ + \ + return count; \ +} + +ATTR_STORE_FN(disabled_keys, EV_KEY); +ATTR_STORE_FN(disabled_switches, EV_SW); + +/* + * ATTRIBUTES: + * + * /sys/devices/platform/gpio-keys/disabled_keys [rw] + * /sys/devices/platform/gpio-keys/disables_switches [rw] + */ +static DEVICE_ATTR(disabled_keys, S_IWUSR | S_IRUGO, + gpio_keys_show_disabled_keys, + gpio_keys_store_disabled_keys); +static DEVICE_ATTR(disabled_switches, S_IWUSR | S_IRUGO, + gpio_keys_show_disabled_switches, + gpio_keys_store_disabled_switches); + +static struct attribute *gpio_keys_attrs[] = { + &dev_attr_keys.attr, + &dev_attr_switches.attr, + &dev_attr_disabled_keys.attr, + &dev_attr_disabled_switches.attr, + NULL, +}; + +static struct attribute_group gpio_keys_attr_group = { + .attrs = gpio_keys_attrs, +}; + static void gpio_keys_report_event(struct gpio_button_data *bdata) { struct gpio_keys_button *button = bdata->button; @@ -79,11 +355,13 @@ static irqreturn_t gpio_keys_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit gpio_keys_setup_key(struct device *dev, +static int __devinit gpio_keys_setup_key(struct platform_device *pdev, struct gpio_button_data *bdata, struct gpio_keys_button *button) { char *desc = button->desc ? button->desc : "gpio_keys"; + struct device *dev = &pdev->dev; + unsigned long irqflags; int irq, error; setup_timer(&bdata->timer, gpio_keys_timer, (unsigned long)bdata); @@ -112,10 +390,15 @@ static int __devinit gpio_keys_setup_key(struct device *dev, goto fail3; } - error = request_irq(irq, gpio_keys_isr, - IRQF_SHARED | - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - desc, bdata); + irqflags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; + /* + * If platform has specified that the button can be disabled, + * we don't want it to share the interrupt line. + */ + if (!button->can_disable) + irqflags |= IRQF_SHARED; + + error = request_irq(irq, gpio_keys_isr, irqflags, desc, bdata); if (error) { dev_err(dev, "Unable to claim irq %d; error %d\n", irq, error); @@ -149,6 +432,10 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) goto fail1; } + ddata->input = input; + ddata->n_buttons = pdata->nbuttons; + mutex_init(&ddata->disable_lock); + platform_set_drvdata(pdev, ddata); input->name = pdev->name; @@ -164,8 +451,6 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) if (pdata->rep) __set_bit(EV_REP, input->evbit); - ddata->input = input; - for (i = 0; i < pdata->nbuttons; i++) { struct gpio_keys_button *button = &pdata->buttons[i]; struct gpio_button_data *bdata = &ddata->data[i]; @@ -174,7 +459,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) bdata->input = input; bdata->button = button; - error = gpio_keys_setup_key(dev, bdata, button); + error = gpio_keys_setup_key(pdev, bdata, button); if (error) goto fail2; @@ -184,13 +469,20 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) input_set_capability(input, type, button->code); } - error = input_register_device(input); + error = sysfs_create_group(&pdev->dev.kobj, &gpio_keys_attr_group); if (error) { - dev_err(dev, "Unable to register input device, " - "error: %d\n", error); + dev_err(dev, "Unable to export keys/switches, error: %d\n", + error); goto fail2; } + error = input_register_device(input); + if (error) { + dev_err(dev, "Unable to register input device, error: %d\n", + error); + goto fail3; + } + /* get current state of buttons */ for (i = 0; i < pdata->nbuttons; i++) gpio_keys_report_event(&ddata->data[i]); @@ -200,6 +492,8 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) return 0; + fail3: + sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group); fail2: while (--i >= 0) { free_irq(gpio_to_irq(pdata->buttons[i].gpio), &ddata->data[i]); @@ -224,6 +518,8 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) struct input_dev *input = ddata->input; int i; + sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group); + device_init_wakeup(&pdev->dev, 0); for (i = 0; i < pdata->nbuttons; i++) { diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 1289fa7623c..cd0b3f30f48 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -10,6 +10,7 @@ struct gpio_keys_button { int type; /* input event type (EV_KEY, EV_SW) */ int wakeup; /* configure the button as a wake-up source */ int debounce_interval; /* debounce ticks interval in msecs */ + bool can_disable; }; struct gpio_keys_platform_data { -- cgit v1.2.3-70-g09d2 From fce877e3a429940a986e085a41e8b57f2d922e36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 13:25:12 +0100 Subject: bitops: Ensure the compile time HWEIGHT is only used for such Avoid accidental misuse by failing to compile things Suggested-by: Andrew Morton Signed-off-by: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 +++++++--- include/linux/bitops.h | 33 ++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b91992b6b2..96cfc1a4fe9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,13 +93,16 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define __EVENT_CONSTRAINT(c, n, m, w) {\ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ - .weight = HWEIGHT64((u64)(n)), \ + .weight = (w), \ } +#define EVENT_CONSTRAINT(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) @@ -2622,7 +2625,8 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) - EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, + 0, x86_pmu.num_events); pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index ba0fd1eb4af..25b8b2f33ae 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -45,19 +45,30 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -#define HWEIGHT8(w) \ - ( (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ +/* + * Clearly slow versions of the hweightN() functions, their benefit is + * of course compile time evaluation of constant arguments. + */ +#define HWEIGHT8(w) \ + ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ + (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ (!!((w) & (1ULL << 7))) ) -#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8)) -#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16)) -#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32)) +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) + +/* + * Type invariant version that simply casts things to the + * largest type. + */ +#define HWEIGHT(w) HWEIGHT64((u64)(w)) /** * rol32 - rotate a 32-bit value left -- cgit v1.2.3-70-g09d2 From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 1 Feb 2010 14:50:01 +0200 Subject: perf_events, x86: Fix bug in hw_perf_enable() We cannot assume that because hwc->idx == assign[i], we can avoid reprogramming the counter in hw_perf_enable(). The event may have been scheduled out and another event may have been programmed into this counter. Thus, we need a more robust way of verifying if the counter still contains config/data related to an event. This patch adds a generation number to each counter on each cpu. Using this mechanism we can verify reliabilty whether the content of a counter corresponds to an event. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++++------ include/linux/perf_event.h | 2 ++ 2 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 96cfc1a4fe9..a920f173a22 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -90,6 +90,7 @@ struct cpu_hw_events { int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; @@ -1142,6 +1143,8 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->config = ARCH_PERFMON_EVENTSEL_INT; hwc->idx = -1; + hwc->last_cpu = -1; + hwc->last_tag = ~0ULL; /* * Count user and OS events unless requested not to. @@ -1457,11 +1460,14 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return n; } - static inline void x86_assign_hw_event(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct cpu_hw_events *cpuc, int i) { - hwc->idx = idx; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = cpuc->assign[i]; + hwc->last_cpu = smp_processor_id(); + hwc->last_tag = ++cpuc->tags[i]; if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { hwc->config_base = 0; @@ -1480,6 +1486,15 @@ static inline void x86_assign_hw_event(struct perf_event *event, } } +static inline int match_prev_assignment(struct hw_perf_event *hwc, + struct cpu_hw_events *cpuc, + int i) +{ + return hwc->idx == cpuc->assign[i] && + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i]; +} + static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); void hw_perf_enable(void) @@ -1508,7 +1523,14 @@ void hw_perf_enable(void) event = cpuc->event_list[i]; hwc = &event->hw; - if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + /* + * we can avoid reprogramming counter if: + * - assigned same counter as last time + * - running on same CPU as last time + * - no other event has used the counter since + */ + if (hwc->idx == -1 || + match_prev_assignment(hwc, cpuc, i)) continue; __x86_pmu_disable(event, cpuc); @@ -1522,12 +1544,12 @@ void hw_perf_enable(void) hwc = &event->hw; if (hwc->idx == -1) { - x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_assign_hw_event(event, cpuc, i); x86_perf_event_set_period(event, hwc, hwc->idx); } /* * need to mark as active because x86_pmu_disable() - * clear active_mask and eventsp[] yet it preserves + * clear active_mask and events[] yet it preserves * idx */ set_bit(hwc->idx, cpuc->active_mask); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 556b0f4a668..071a7db5254 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -478,9 +478,11 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 last_tag; unsigned long config_base; unsigned long event_base; int idx; + int last_cpu; }; struct { /* software */ s64 remaining; -- cgit v1.2.3-70-g09d2 From 6683ece36e3531fc8c75f69e7165c5f20930be88 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Feb 2010 10:22:25 -0800 Subject: net: use helpers to access mc list V2 This patch introduces the similar helpers as those already done for uc list. However multicast lists are no list_head lists but "mademanually". The three macros added by this patch will make the transition of mc_list to list_head smooth in two steps: 1) convert all drivers to use these macros (with the original iterator of type "struct dev_mc_list") 2) once all drivers are converted, convert list type and iterators to "struct netdev_hw_addr" in one patch. >From now on, drivers can (and should) use "netdev_for_each_mc_addr" to iterate over the addresses with iterator of type "struct netdev_hw_addr". Also macros "netdev_mc_count" and "netdev_mc_empty" to read list's length. This is the state which should be reached in all drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 622ba5aa93c..e535700a3b7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -268,6 +268,12 @@ struct netdev_hw_addr_list { #define netdev_for_each_uc_addr(ha, dev) \ list_for_each_entry(ha, &dev->uc.list, list) +#define netdev_mc_count(dev) ((dev)->mc_count) +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) + +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ -- cgit v1.2.3-70-g09d2 From f8f76db1db369f3a130ac3fd33e2eee5f1610d9c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Feb 2010 10:23:02 -0800 Subject: libphy: add phy_find_first function Many drivers do this in them manually. Now they can use this function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 16 ++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index adbc0fded13..db1794546c5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -276,6 +276,22 @@ int phy_device_register(struct phy_device *phydev) } EXPORT_SYMBOL(phy_device_register); +/** + * phy_find_first - finds the first PHY device on the bus + * @bus: the target MII bus + */ +struct phy_device *phy_find_first(struct mii_bus *bus) +{ + int addr; + + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + if (bus->phy_map[addr]) + return bus->phy_map[addr]; + } + return NULL; +} +EXPORT_SYMBOL(phy_find_first); + /** * phy_prepare_link - prepares the PHY layer to monitor link status * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 6a7eb402165..14d7fdf6a90 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -452,6 +452,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, const char *bus_id, u32 flags, phy_interface_t interface); +struct phy_device *phy_find_first(struct mii_bus *bus); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, void (*handler)(struct net_device *), u32 flags, phy_interface_t interface); -- cgit v1.2.3-70-g09d2 From 8ee2bf9ab792d0c02b13ca3acbd036debb7745d9 Mon Sep 17 00:00:00 2001 From: Sriramakrishnan Date: Thu, 19 Nov 2009 15:58:25 +0530 Subject: TI Davinci EMAC : Re-use driver for other platforms. The davinci EMAC peripheral is also available on other TI platforms -notably TI AM3517 SoC. This patch modifies the config option and the platform structure header files so that the driver can be reused on non-davinci platforms as well. Signed-off-by: Sriramakrishnan Acked-by: Chaithrika U S Acked-by: David S. Miller Signed-off-by: Kevin Hilman --- arch/arm/mach-davinci/common.c | 2 +- arch/arm/mach-davinci/include/mach/da8xx.h | 2 +- arch/arm/mach-davinci/include/mach/dm365.h | 2 +- arch/arm/mach-davinci/include/mach/dm644x.h | 2 +- arch/arm/mach-davinci/include/mach/dm646x.h | 2 +- arch/arm/mach-davinci/include/mach/emac.h | 36 ----------------------------- drivers/net/Kconfig | 2 +- drivers/net/davinci_emac.c | 3 +-- include/linux/davinci_emac.h | 36 +++++++++++++++++++++++++++++ 9 files changed, 43 insertions(+), 44 deletions(-) delete mode 100644 arch/arm/mach-davinci/include/mach/emac.h create mode 100644 include/linux/davinci_emac.h (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index c2de94cde56..94f27cbcd55 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -11,13 +11,13 @@ #include #include #include +#include #include #include #include #include -#include #include "clock.h" diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h index d43a4b6b6d7..d9a7f11894c 100644 --- a/arch/arm/mach-davinci/include/mach/da8xx.h +++ b/arch/arm/mach-davinci/include/mach/da8xx.h @@ -13,10 +13,10 @@ #include