From d0959024d8fb6555ba8bfdc6624cc7b7c2e675fd Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer_list: Remove alignment padding on 64 bit when CONFIG_TIMER_STATS Reorder struct timer_list to remove 8 bytes of alignment padding on 64 bit builds when CONFIG_TIMER_STATS is selected. timer_list is widely used across the kernel so many structures will benefit and shrink in size. For example, with my config on x86_64 per_cpu_dm_data shrinks from 136 to 128 bytes and ahci_port_priv shrinks from 1032 to 968 bytes. Signed-off-by: Richard Kennedy Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 38cf093ef62..f3dccdb44f9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -24,9 +24,9 @@ struct timer_list { int slack; #ifdef CONFIG_TIMER_STATS + int start_pid; void *start_site; char start_comm[16]; - int start_pid; #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; -- cgit v1.2.3-70-g09d2 From aaabe31c25a439b92cc281b14ca18b85bae7e7a6 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer: Initialize the field slack of timer_list TIMER_INITIALIZER() should initialize the field slack of timer_list as __init_timer() does. Signed-off-by: Changli Gao Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index f3dccdb44f9..1794674c1a5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -54,6 +54,7 @@ extern struct tvec_base boot_tvec_bases; .expires = (_expires), \ .data = (_data), \ .base = &boot_tvec_bases, \ + .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } -- cgit v1.2.3-70-g09d2 From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Permit statically-declared work with deferrable timers Currently, you have to just define a delayed_work uninitialised, and then initialise it before first use. That's a tad clumsy. At risk of playing mind-games with the compiler, fooling it into doing pointer arithmetic with compile-time-constants, this lets clients properly initialise delayed work with deferrable timers statically. This patch was inspired by the issues which lead Artem Bityutskiy to commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue deferrable"). Signed-off-by: Phil Carmody Acked-by: Artem Bityutskiy Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 25 +++++++++++++++++++++++++ include/linux/workqueue.h | 8 ++++++++ kernel/timer.c | 15 +-------------- 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 1794674c1a5..cbfb7a355d3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases; #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif +/* + * Note that all tvec_bases are 2 byte aligned and lower bit of + * base in timer_list is guaranteed to be zero. Use the LSB to + * indicate whether the timer is deferrable. + * + * A deferrable timer will work normally when the system is busy, but + * will not cause a CPU to come out of idle just to service it; instead, + * the timer will be serviced when the CPU eventually wakes up with a + * subsequent non-deferrable timer. + */ +#define TBASE_DEFERRABLE_FLAG (0x1) + #define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases; __FILE__ ":" __stringify(__LINE__)) \ } +#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ + ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) + +#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ + .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .function = (_function), \ + .expires = (_expires), \ + .data = (_data), \ + .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ + __TIMER_LOCKDEP_MAP_INITIALIZER( \ + __FILE__ ":" __stringify(__LINE__)) \ + } + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f9648..88238c15ec3 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -127,12 +127,20 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } +#define __DEFERRED_WORK_INITIALIZER(n, f) { \ + .work = __WORK_INITIALIZER((n).work, (f)), \ + .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ + } + #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) +#define DECLARE_DEFERRED_WORK(n, f) \ + struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) + /* * initialize a work item's function pointer */ diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade..72853b256ff 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; -/* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. - * - * A deferrable timer will work normally when the system is busy, but - * will not cause a CPU to come out of idle just to service it; instead, - * the timer will be serviced when the CPU eventually wakes up with a - * subsequent non-deferrable timer. - */ -#define TBASE_DEFERRABLE_FLAG (0x1) - /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { @@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + timer->base = TBASE_MAKE_DEFERRED(timer->base); } static inline void -- cgit v1.2.3-70-g09d2 From 6f1bc451e6a79470b122a37ee1fc6bbca450f444 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: timer: Make try_to_del_timer_sync() the same on SMP and UP On UP try_to_del_timer_sync() is mapped to del_timer() which does not take the running timer callback into account, so it has different semantics. Remove the SMP dependency of try_to_del_timer_sync() by using base->running_timer in the UP case as well. [ tglx: Removed set_running_timer() inline and tweaked the changelog ] Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 4 ++-- kernel/timer.c | 17 +++-------------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index cbfb7a355d3..6abd9138bed 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); +extern int try_to_del_timer_sync(struct timer_list *timer); + #ifdef CONFIG_SMP - extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else -# define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/kernel/timer.c b/kernel/timer.c index 72853b256ff..47b86c1e322 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); - -static inline void set_running_timer(struct tvec_base *base, - struct timer_list *timer) -{ -#ifdef CONFIG_SMP - base->running_timer = timer; -#endif -} - static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; @@ -923,15 +914,12 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -#ifdef CONFIG_SMP /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. - * - * It must not be called from interrupt contexts. */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -960,6 +948,7 @@ out: } EXPORT_SYMBOL(try_to_del_timer_sync); +#ifdef CONFIG_SMP /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1098,7 +1087,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); - set_running_timer(base, timer); + base->running_timer = timer; detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -1106,7 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) spin_lock_irq(&base->lock); } } - set_running_timer(base, NULL); + base->running_timer = NULL; spin_unlock_irq(&base->lock); } -- cgit v1.2.3-70-g09d2 From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++-- kernel/irq/manage.c | 4 +++- kernel/kthread.c | 2 +- kernel/sched.c | 6 +++--- kernel/softirq.c | 4 +++- kernel/trace/trace_selftest.c | 2 +- kernel/watchdog.c | 2 +- 7 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0383601a927..849c8670583 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1942,9 +1942,10 @@ extern int task_nice(const struct task_struct *p); extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler(struct task_struct *, int, + const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, - struct sched_param *); + const struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5fa36..850f030fa0c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -573,7 +573,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } */ static int irq_thread(void *data) { - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; + static struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; diff --git a/kernel/kthread.c b/kernel/kthread.c index 2dc3786349d..74cf6f5e7ad 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), wait_for_completion(&create.done); if (!IS_ERR(create.result)) { - struct sched_param param = { .sched_priority = 0 }; + static struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); diff --git a/kernel/sched.c b/kernel/sched.c index d42992bccdf..51944e8c38a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4701,7 +4701,7 @@ static bool check_same_owner(struct task_struct *p) } static int __sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param, bool user) + const struct sched_param *param, bool user) { int retval, oldprio, oldpolicy = -1, on_rq, running; unsigned long flags; @@ -4856,7 +4856,7 @@ recheck: * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, true); } @@ -4874,7 +4874,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); * but our caller might not have that capability. */ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, false); } diff --git a/kernel/softirq.c b/kernel/softirq.c index fc978889b19..081869ed3a9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -851,7 +851,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, cpumask_any(cpu_online_mask)); case CPU_DEAD: case CPU_DEAD_FROZEN: { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { + .sched_priority = MAX_RT_PRIO-1 + }; p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b320..562c56e048f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - struct sched_param param = { .sched_priority = 5 }; + static struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d..94ca779aa9c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -307,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ static int watchdog(void *unused) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v1.2.3-70-g09d2 From fc766e4c4965915ab52a1d1fa3c7a7b3e7bc07f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Oct 2010 03:09:24 +0000 Subject: decnet: RCU conversion and get rid of dev_base_lock While tracking dev_base_lock users, I found decnet used it in dnet_select_source(), but for a wrong purpose: Writers only hold RTNL, not dev_base_lock, so readers must use RCU if they cannot use RTNL. Adds an rcu_head in struct dn_ifaddr and handle proper RCU management. Adds __rcu annotation in dn_route as well. Signed-off-by: Eric Dumazet Acked-by: Steven Whitehouse Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/dn_dev.h | 27 ++++++++----- include/net/dst.h | 8 ++-- net/decnet/af_decnet.c | 2 +- net/decnet/dn_dev.c | 100 +++++++++++++++++++++++++++------------------- net/decnet/dn_fib.c | 6 ++- net/decnet/dn_neigh.c | 2 +- net/decnet/dn_route.c | 68 +++++++++++++++++-------------- 8 files changed, 127 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8fd2c23a1b..578debb801f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -951,7 +951,7 @@ struct net_device { #endif void *atalk_ptr; /* AppleTalk link */ struct in_device __rcu *ip_ptr; /* IPv4 specific data */ - void *dn_ptr; /* DECnet specific data */ + struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 0916bbf3bdf..b9e32db03f2 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -5,13 +5,14 @@ struct dn_dev; struct dn_ifaddr { - struct dn_ifaddr *ifa_next; + struct dn_ifaddr __rcu *ifa_next; struct dn_dev *ifa_dev; __le16 ifa_local; __le16 ifa_address; __u8 ifa_flags; __u8 ifa_scope; char ifa_label[IFNAMSIZ]; + struct rcu_head rcu; }; #define DN_DEV_S_RU 0 /* Run - working normally */ @@ -83,7 +84,7 @@ struct dn_dev_parms { struct dn_dev { - struct dn_ifaddr *ifa_list; + struct dn_ifaddr __rcu *ifa_list; struct net_device *dev; struct dn_dev_parms parms; char use_long; @@ -171,19 +172,27 @@ extern int unregister_dnaddr_notifier(struct notifier_block *nb); static inline int dn_dev_islocal(struct net_device *dev, __le16 addr) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; + int res = 0; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) { printk(KERN_DEBUG "dn_dev_islocal: Called for non DECnet device\n"); - return 0; + goto out; } - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) - if ((addr ^ ifa->ifa_local) == 0) - return 1; - - return 0; + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa != NULL; + ifa = rcu_dereference(ifa->ifa_next)) + if ((addr ^ ifa->ifa_local) == 0) { + res = 1; + break; + } +out: + rcu_read_unlock(); + return res; } #endif /* _NET_DN_DEV_H */ diff --git a/include/net/dst.h b/include/net/dst.h index ffe9cb719c0..a5bd72646d6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -94,10 +94,10 @@ struct dst_entry { int __use; unsigned long lastuse; union { - struct dst_entry *next; - struct rtable __rcu *rt_next; - struct rt6_info *rt6_next; - struct dn_route *dn_next; + struct dst_entry *next; + struct rtable __rcu *rt_next; + struct rt6_info *rt6_next; + struct dn_route __rcu *dn_next; }; }; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d6b93d19790..18b8a2cbdf7 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1848,7 +1848,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu) { unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER; if (dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); mtu -= LL_RESERVED_SPACE(dev); if (dn_db->use_long) mtu -= 21; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4c409b46aa3..0ba15633c41 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, if (table->extra1 == NULL) return -EINVAL; - dn_db = dev->dn_ptr; + dn_db = rcu_dereference_raw(dev->dn_ptr); old = dn_db->parms.forwarding; err = proc_dointvec(table, write, buffer, lenp, ppos); @@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void) return ifa; } -static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa) +static void dn_dev_free_ifa_rcu(struct rcu_head *head) { - kfree(ifa); + kfree(container_of(head, struct dn_ifaddr, rcu)); } -static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy) +static void dn_dev_free_ifa(struct dn_ifaddr *ifa) { - struct dn_ifaddr *ifa1 = *ifap; + call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu); +} + +static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) +{ + struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap); unsigned char mac_addr[6]; struct net_device *dev = dn_db->dev; @@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Check for duplicates */ - for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + for (ifa1 = rtnl_dereference(dn_db->ifa_list); + ifa1 != NULL; + ifa1 = rtnl_dereference(ifa1->ifa_next)) { if (ifa1->ifa_local == ifa->ifa_local) return -EEXIST; } @@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - dn_db->ifa_list = ifa; + rcu_assign_pointer(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); int rv; if (dn_db == NULL) { @@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; struct dn_dev *dn_db; struct net_device *dev; - struct dn_ifaddr *ifa = NULL, **ifap = NULL; + struct dn_ifaddr *ifa = NULL; + struct dn_ifaddr __rcu **ifap = NULL; int ret = 0; if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) @@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) goto done; } - if ((dn_db = dev->dn_ptr) != NULL) { - for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) break; } @@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) dev = __dev_get_by_index(&init_net, ifindex); if (dev) - dn_dev = dev->dn_ptr; + dn_dev = rtnl_dereference(dev->dn_ptr); return dn_dev; } @@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa, **ifap; + struct dn_ifaddr *ifa; + struct dn_ifaddr __rcu **ifap; int err = -EINVAL; if (!net_eq(net, &init_net)) @@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto errout; err = -EADDRNOTAVAIL; - for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) continue; @@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; - if ((dn_db = dev->dn_ptr) == NULL) { + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) { dn_db = dn_dev_create(dev, &err); if (!dn_db) return err; @@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = 0; } - if ((dn_db = dev->dn_ptr) == NULL) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) goto cont; - for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; - ifa = ifa->ifa_next, dn_idx++) { + for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; + ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { if (dn_idx < skip_naddr) continue; @@ -773,21 +786,22 @@ done: static int dn_dev_get_first(struct net_device *dev, __le16 *addr) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int rv = -ENODEV; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) goto out; - rtnl_lock(); - ifa = dn_db->ifa_list; + ifa = rcu_dereference(dn_db->ifa_list); if (ifa != NULL) { *addr = ifa->ifa_local; rv = 0; } - rtnl_unlock(); out: + rcu_read_unlock(); return rv; } @@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) struct endnode_hello_message *msg; struct sk_buff *skb = NULL; __le16 *pktlen; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) return; @@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; struct sk_buff *skb; size_t size; @@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dn_send_endnode_hello(dev, ifa); @@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa) static int dn_eth_up(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_add(dev, dn_rt_all_end_mcast); @@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev) static void dn_eth_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_del(dev, dn_rt_all_end_mcast); @@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev); static void dn_dev_timer_func(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db->t3 <= dn_db->parms.t2) { if (dn_db->parms.timer3) { - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa; + ifa = rcu_dereference(ifa->ifa_next)) { if (!(ifa->ifa_flags & IFA_F_SECONDARY)) dn_db->parms.timer3(dev, ifa); } @@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg) } else { dn_db->t3 -= dn_db->parms.t2; } - + rcu_read_unlock(); dn_dev_set_timer(dev); } static void dn_dev_set_timer(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.t2 > dn_db->parms.t3) dn_db->parms.t2 = dn_db->parms.t3; @@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) return NULL; memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - smp_wmb(); - dev->dn_ptr = dn_db; + + rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - dev->dn_ptr = NULL; + rcu_assign_pointer(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } @@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev) struct dn_ifaddr *ifa; __le16 addr = decnet_address; int maybe_default = 0; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) return; @@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev) static void dn_dev_delete(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if (dn_db == NULL) return; @@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev) void dn_dev_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); struct dn_ifaddr *ifa; if (dn_db == NULL) return; - while((ifa = dn_db->ifa_list) != NULL) { + while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) { dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); dn_dev_free_ifa(ifa); } @@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev) } static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu) + __acquires(RCU) { int i; struct net_device *dev; @@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(rcu) + __releases(RCU) { rcu_read_unlock(); } @@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) struct net_device *dev = v; char peer_buf[DN_ASCBUF_LEN]; char router_buf[DN_ASCBUF_LEN]; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr); seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" " %04hu %03d %02x %-10s %-7s %-7s\n", diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 4ab96c15166..0ef0a81bcd7 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - dn_db = dev->dn_ptr; + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) continue; - for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) { + for (ifa2 = rcu_dereference(dn_db->ifa_list); + ifa2 != NULL; + ifa2 = rcu_dereference(ifa2->ifa_next)) { if (ifa2->ifa_local == ifa->ifa_local) { found_it = 1; break; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index a085dbcf5c7..602dade7e9a 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) write_lock(&neigh->lock); neigh->used = jiffies; - dn_db = (struct dn_dev *)neigh->dev->dn_ptr; + dn_db = rcu_dereference(neigh->dev->dn_ptr); if (!(neigh->nud_state & NUD_PERMANENT)) { neigh->updated = jiffies; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index df0f3e54ff8..94a9eb1d313 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -93,7 +93,7 @@ struct dn_rt_hash_bucket { - struct dn_route *chain; + struct dn_route __rcu *chain; spinlock_t lock; }; @@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt) static void dn_dst_check_expire(unsigned long dummy) { int i; - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; unsigned long now = jiffies; unsigned long expire = 120 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { rtp = &dn_rt_hash_table[i].chain; spin_lock(&dn_rt_hash_table[i].lock); - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy) static int dn_dst_gc(struct dst_ops *ops) { - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; int i; unsigned long now = jiffies; unsigned long expire = 10 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); rtp = &dn_rt_hash_table[i].chain; - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { u32 min_mtu = 230; struct dn_dev *dn = dst->neighbour ? - (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL; + rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -277,13 +281,15 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) { - struct dn_route *rth, **rthp; + struct dn_route *rth; + struct dn_route __rcu **rthp; unsigned long now = jiffies; rthp = &dn_rt_hash_table[hash].chain; spin_lock_bh(&dn_rt_hash_table[hash].lock); - while((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ *rthp = rth->dst.dn_next; @@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy) int i; struct dn_route *rt, *next; - for(i = 0; i < dn_rt_hash_mask; i++) { + for (i = 0; i < dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); - if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL) + if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL) goto nothing_to_declare; - for(; rt; rt=next) { - next = rt->dst.dn_next; - rt->dst.dn_next = NULL; + for(; rt; rt = next) { + next = rcu_dereference_raw(rt->dst.dn_next); + RCU_INIT_POINTER(rt->dst.dn_next, NULL); dst_free((struct dst_entry *)rt); } @@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb) */ static int dn_route_rx_packet(struct sk_buff *skb) { - struct dn_skb_cb *cb = DN_SKB_CB(skb); + struct dn_skb_cb *cb; int err; if ((err = dn_route_input(skb)) == 0) return dst_input(skb); + cb = DN_SKB_CB(skb); if (decnet_debug_level & 4) { char *devname = skb->dev ? skb->dev->name : "???"; - struct dn_skb_cb *cb = DN_SKB_CB(skb); + printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, @@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_skb_cb *cb; unsigned char flags = 0; __u16 len = le16_to_cpu(*(__le16 *)skb->data); - struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn = rcu_dereference(dev->dn_ptr); unsigned char padlen = 0; if (!net_eq(dev_net(dev), &init_net)) @@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct dst_entry *dst = skb_dst(skb); - struct dn_dev *dn_db = dst->dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; struct neighbour *neigh = dst->neighbour; int header_len; @@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2) static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) { __le16 saddr = 0; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int best_match = 0; int ret; - read_lock(&dev_base_lock); - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa != NULL; + ifa = rcu_dereference(ifa->ifa_next)) { if (ifa->ifa_scope > scope) continue; if (!daddr) { @@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int if (best_match == 0) saddr = ifa->ifa_local; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return saddr; } @@ -1020,7 +1030,7 @@ source_ok: err = -ENODEV; if (dev_out == NULL) goto out; - dn_db = dev_out->dn_ptr; + dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); @@ -1233,7 +1243,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(in_dev); - if ((dn_db = in_dev->dn_ptr) == NULL) + if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL) goto out; /* Zero source addresses are not allowed */ @@ -1677,15 +1687,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->dst.dn_next; - while(!rt) { + rt = rcu_dereference_bh(rt->dst.dn_next); + while (!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) break; rcu_read_lock_bh(); - rt = dn_rt_hash_table[s->bucket].chain; + rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); } - return rcu_dereference_bh(rt); + return rt; } static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) -- cgit v1.2.3-70-g09d2 From 1da4b1c6a4dfb5a13d7147a27c1ac53fed09befd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 9 Nov 2010 11:22:58 +0000 Subject: x86/mrst: Add SFI platform device parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFI provides a series of tables. These describe the platform devices present including SPI and I²C devices, as well as various sensors, keypads and other glue as well as interfaces provided via the SCU IPC mechanism (intel_scu_ipc.c) This patch is a merge of the core elements and relevant fixes from the Intel development code by Feng, Alek, myself into a single coherent patch for upstream submission. It provides the needed infrastructure to register I2C, SPI and platform devices described by the tables, as well as handlers for some of the hardware already supported in kernel. The 0.8 firmware also provides GPIO tables. Devices are created at boot time or if they are SCU dependant at the point an SCU is discovered. The existing Linux device mechanisms will then handle the device binding. At an abstract level this is an SFI to Linux device translator. Device/platform specific setup/glue is in this file. This is done so that the drivers for the generic I²C and SPI bus devices remain cross platform as they should. (Updated from RFC version to correct the emc1403 name used by the firmware and a wrongly used #define) Signed-off-by: Alek Du LKML-Reference: <20101109112158.20013.6158.stgit@localhost.localdomain> [Clean ups, removal of 0.7 support] Signed-off-by: Feng Tang [Clean ups] Signed-off-by: Alan Cox Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 2 + arch/x86/include/asm/mrst.h | 4 + arch/x86/platform/mrst/mrst.c | 515 ++++++++++++++++++++++++++++++++++- drivers/platform/x86/intel_scu_ipc.c | 5 + include/linux/sfi.h | 8 +- 5 files changed, 527 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e8327686d3c..b306b84fc8c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -385,6 +385,8 @@ config X86_MRST depends on X86_EXTENDED_PLATFORM depends on X86_IO_APIC select APB_TIMER + select I2C + select SPI ---help--- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin Internet Device(MID) platform. Moorestown consists of two chips: diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 4a711a684b1..283debd29fc 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -50,4 +50,8 @@ extern void mrst_early_console_init(void); extern struct console early_hsu_console; extern void hsu_early_console_init(void); + +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + #endif /* _ASM_X86_MRST_H */ diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 79ae68154e8..cfa1af24edd 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -9,9 +9,19 @@ * as published by the Free Software Foundation; version 2 * of the License. */ + +#define pr_fmt(fmt) "mrst: " fmt + #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -23,8 +33,10 @@ #include #include #include +#include #include + /* * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, * cmdline option x86_mrst_timer can be used to override the configuration @@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table) memcpy(sfi_mtimer_array, pentry, totallen); } - printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); + pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num); pentry = sfi_mtimer_array; for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { - printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," + pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz," " irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->freq_hz, pentry->irq); if (!pentry->irq) @@ -176,10 +188,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) memcpy(sfi_mrtc_array, pentry, totallen); } - printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); + pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num); pentry = sfi_mrtc_array; for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { - printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", + pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", totallen, (u32)pentry->phys_addr, pentry->irq); mp_irq.type = MP_IOAPIC; mp_irq.irqtype = mp_INT; @@ -309,3 +321,498 @@ static inline int __init setup_x86_mrst_timer(char *arg) return 0; } __setup("x86_mrst_timer=", setup_x86_mrst_timer); + +/* + * Parsing GPIO table first, since the DEVS table will need this table + * to map the pin name to the actual pin. + */ +static struct sfi_gpio_table_entry *gpio_table; +static int gpio_num_entry; + +static int __init sfi_parse_gpio(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_gpio_table_entry *pentry; + int num, i; + + if (gpio_table) + return 0; + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); + pentry = (struct sfi_gpio_table_entry *)sb->pentry; + + gpio_table = (struct sfi_gpio_table_entry *) + kmalloc(num * sizeof(*pentry), GFP_KERNEL); + if (!gpio_table) + return -1; + memcpy(gpio_table, pentry, num * sizeof(*pentry)); + gpio_num_entry = num; + + pr_debug("GPIO pin info:\n"); + for (i = 0; i < num; i++, pentry++) + pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s," + " pin = %d\n", i, + pentry->controller_name, + pentry->pin_name, + pentry->pin_no); + return 0; +} + +static int get_gpio_by_name(const char *name) +{ + struct sfi_gpio_table_entry *pentry = gpio_table; + int i; + + if (!pentry) + return -1; + for (i = 0; i < gpio_num_entry; i++, pentry++) { + if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) + return pentry->pin_no; + } + return -1; +} + +/* + * Here defines the array of devices platform data that IAFW would export + * through SFI "DEVS" table, we use name and type to match the device and + * its platform data. + */ +struct devs_id { + char name[SFI_NAME_LEN + 1]; + u8 type; + u8 delay; + void *(*get_platform_data)(void *info); +}; + +/* the offset for the mapping of global gpio pin to irq */ +#define MRST_IRQ_OFFSET 0x100 + +static void __init *pmic_gpio_platform_data(void *info) +{ + static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; + int gpio_base = get_gpio_by_name("pmic_gpio_base"); + + if (gpio_base == -1) + gpio_base = 64; + pmic_gpio_pdata.gpio_base = gpio_base; + pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET; + pmic_gpio_pdata.gpiointr = 0xffffeff8; + + return &pmic_gpio_pdata; +} + +static void __init *max3111_platform_data(void *info) +{ + struct spi_board_info *spi_info = info; + int intr = get_gpio_by_name("max3111_int"); + + if (intr == -1) + return NULL; + spi_info->irq = intr + MRST_IRQ_OFFSET; + return NULL; +} + +/* we have multiple max7315 on the board ... */ +#define MAX7315_NUM 2 +static void __init *max7315_platform_data(void *info) +{ + static struct pca953x_platform_data max7315_pdata[MAX7315_NUM]; + static int nr; + struct pca953x_platform_data *max7315 = &max7315_pdata[nr]; + struct i2c_board_info *i2c_info = info; + int gpio_base, intr; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + + if (nr == MAX7315_NUM) { + pr_err("too many max7315s, we only support %d\n", + MAX7315_NUM); + return NULL; + } + /* we have several max7315 on the board, we only need load several + * instances of the same pca953x driver to cover them + */ + strcpy(i2c_info->type, "max7315"); + if (nr++) { + sprintf(base_pin_name, "max7315_%d_base", nr); + sprintf(intr_pin_name, "max7315_%d_int", nr); + } else { + strcpy(base_pin_name, "max7315_base"); + strcpy(intr_pin_name, "max7315_int"); + } + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + if (gpio_base == -1) + return NULL; + max7315->gpio_base = gpio_base; + if (intr != -1) { + i2c_info->irq = intr + MRST_IRQ_OFFSET; + max7315->irq_base = gpio_base + MRST_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + max7315->irq_base = -1; + } + return max7315; +} + +static void __init *emc1403_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("thermal_int"); + int intr2nd = get_gpio_by_name("thermal_alert"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static void __init *lis331dl_platform_data(void *info) +{ + static short intr2nd_pdata; + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("accel_int"); + int intr2nd = get_gpio_by_name("accel_2"); + + if (intr == -1 || intr2nd == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET; + + return &intr2nd_pdata; +} + +static const struct devs_id __initconst device_ids[] = { + {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, + {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, + {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, + {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, + {}, +}; + +#define MAX_IPCDEVS 24 +static struct platform_device *ipc_devs[MAX_IPCDEVS]; +static int ipc_next_dev; + +#define MAX_SCU_SPI 24 +static struct spi_board_info *spi_devs[MAX_SCU_SPI]; +static int spi_next_dev; + +#define MAX_SCU_I2C 24 +static struct i2c_board_info *i2c_devs[MAX_SCU_I2C]; +static int i2c_bus[MAX_SCU_I2C]; +static int i2c_next_dev; + +static void __init intel_scu_device_register(struct platform_device *pdev) +{ + if(ipc_next_dev == MAX_IPCDEVS) + pr_err("too many SCU IPC devices"); + else + ipc_devs[ipc_next_dev++] = pdev; +} + +static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) +{ + struct spi_board_info *new_dev; + + if (spi_next_dev == MAX_SCU_SPI) { + pr_err("too many SCU SPI devices"); + return; + } + + new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed spi dev %s\n", + sdev->modalias); + return; + } + memcpy(new_dev, sdev, sizeof(*sdev)); + + spi_devs[spi_next_dev++] = new_dev; +} + +static void __init intel_scu_i2c_device_register(int bus, + struct i2c_board_info *idev) +{ + struct i2c_board_info *new_dev; + + if (i2c_next_dev == MAX_SCU_I2C) { + pr_err("too many SCU I2C devices"); + return; + } + + new_dev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!new_dev) { + pr_err("failed to alloc mem for delayed i2c dev %s\n", + idev->type); + return; + } + memcpy(new_dev, idev, sizeof(*idev)); + + i2c_bus[i2c_next_dev] = bus; + i2c_devs[i2c_next_dev++] = new_dev; +} + +/* Called by IPC driver */ +void intel_scu_devices_create(void) +{ + int i; + + for (i = 0; i < ipc_next_dev; i++) + platform_device_add(ipc_devs[i]); + + for (i = 0; i < spi_next_dev; i++) + spi_register_board_info(spi_devs[i], 1); + + for (i = 0; i < i2c_next_dev; i++) { + struct i2c_adapter *adapter; + struct i2c_client *client; + + adapter = i2c_get_adapter(i2c_bus[i]); + if (adapter) { + client = i2c_new_device(adapter, i2c_devs[i]); + if (!client) + pr_err("can't create i2c device %s\n", + i2c_devs[i]->type); + } else + i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); + } +} +EXPORT_SYMBOL_GPL(intel_scu_devices_create); + +/* Called by IPC driver */ +void intel_scu_devices_destroy(void) +{ + int i; + + for (i = 0; i < ipc_next_dev; i++) + platform_device_del(ipc_devs[i]); +} +EXPORT_SYMBOL_GPL(intel_scu_devices_destroy); + +static void __init install_irq_resource(struct platform_device *pdev, int irq) +{ + /* Single threaded */ + static struct resource __initdata res = { + .name = "IRQ", + .flags = IORESOURCE_IRQ, + }; + res.start = irq; + platform_device_add_resources(pdev, &res, 1); +} + +static void __init sfi_handle_ipc_dev(struct platform_device *pdev) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_IPC && + !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(pdev); + break; + } + dev++; + } + pdev->dev.platform_data = pdata; + intel_scu_device_register(pdev); +} + +static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_SPI && + !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(spi_info); + break; + } + dev++; + } + spi_info->platform_data = pdata; + if (dev->delay) + intel_scu_spi_device_register(spi_info); + else + spi_register_board_info(spi_info, 1); +} + +static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info) +{ + const struct devs_id *dev = device_ids; + void *pdata = NULL; + + while (dev->name[0]) { + if (dev->type == SFI_DEV_TYPE_I2C && + !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) { + pdata = dev->get_platform_data(i2c_info); + break; + } + dev++; + } + i2c_info->platform_data = pdata; + + if (dev->delay) + intel_scu_i2c_device_register(bus, i2c_info); + else + i2c_register_board_info(bus, i2c_info, 1); + } + + +static int __init sfi_parse_devs(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_device_table_entry *pentry; + struct spi_board_info spi_info; + struct i2c_board_info i2c_info; + struct platform_device *pdev; + int num, i, bus; + int ioapic; + struct io_apic_irq_attr irq_attr; + + sb = (struct sfi_table_simple *)table; + num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); + pentry = (struct sfi_device_table_entry *)sb->pentry; + + for (i = 0; i < num; i++, pentry++) { + if (pentry->irq != (u8)0xff) { /* native RTE case */ + /* these SPI2 devices are not exposed to system as PCI + * devices, but they have separate RTE entry in IOAPIC + * so we have to enable them one by one here + */ + ioapic = mp_find_ioapic(pentry->irq); + irq_attr.ioapic = ioapic; + irq_attr.ioapic_pin = pentry->irq; + irq_attr.trigger = 1; + irq_attr.polarity = 1; + io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); + } + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + /* ID as IRQ is a hack that will go away */ + pdev = platform_device_alloc(pentry->name, pentry->irq); + if (pdev == NULL) { + pr_err("out of memory for SFI platform device '%s'.\n", + pentry->name); + continue; + } + install_irq_resource(pdev, pentry->irq); + pr_debug("info[%2d]: IPC bus, name = %16.16s, " + "irq = 0x%2x\n", i, pentry->name, pentry->irq); + sfi_handle_ipc_dev(pdev); + break; + case SFI_DEV_TYPE_SPI: + memset(&spi_info, 0, sizeof(spi_info)); + strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); + spi_info.irq = pentry->irq; + spi_info.bus_num = pentry->host_num; + spi_info.chip_select = pentry->addr; + spi_info.max_speed_hz = pentry->max_freq; + pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, " + "irq = 0x%2x, max_freq = %d, cs = %d\n", i, + spi_info.bus_num, + spi_info.modalias, + spi_info.irq, + spi_info.max_speed_hz, + spi_info.chip_select); + sfi_handle_spi_dev(&spi_info); + break; + case SFI_DEV_TYPE_I2C: + memset(&i2c_info, 0, sizeof(i2c_info)); + bus = pentry->host_num; + strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); + i2c_info.irq = pentry->irq; + i2c_info.addr = pentry->addr; + pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " + "irq = 0x%2x, addr = 0x%x\n", i, bus, + i2c_info.type, + i2c_info.irq, + i2c_info.addr); + sfi_handle_i2c_dev(bus, &i2c_info); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + ; + } + } + return 0; +} + +static int __init mrst_platform_init(void) +{ + sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); + sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); + return 0; +} +arch_initcall(mrst_platform_init); + +/* + * we will search these buttons in SFI GPIO table (by name) + * and register them dynamically. Please add all possible + * buttons here, we will shrink them if no GPIO found. + */ +static struct gpio_keys_button gpio_button[] = { + {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000}, + {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20}, + {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20}, + {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, + {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, + {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20}, +}; + +static struct gpio_keys_platform_data mrst_gpio_keys = { + .buttons = gpio_button, + .rep = 1, + .nbuttons = -1, /* will fill it after search */ +}; + +static struct platform_device pb_device = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &mrst_gpio_keys, + }, +}; + +/* + * Shrink the non-existent buttons, register the gpio button + * device if there is some + */ +static int __init pb_keys_init(void) +{ + struct gpio_keys_button *gb = gpio_button; + int i, num, good = 0; + + num = sizeof(gpio_button) / sizeof(struct gpio_keys_button); + for (i = 0; i < num; i++) { + gb[i].gpio = get_gpio_by_name(gb[i].desc); + if (gb[i].gpio == -1) + continue; + + if (i != good) + gb[good] = gb[i]; + good++; + } + + if (good) { + mrst_gpio_keys.nbuttons = good; + return platform_device_register(&pb_device); + } + return 0; +} +late_initcall(pb_keys_init); diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 41a9e34899a..ca35b0ce944 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -26,6 +26,7 @@ #include #include #include +#include /* IPC defines the following message types */ #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ @@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id) iounmap(ipcdev.ipc_base); return -ENOMEM; } + + intel_scu_devices_create(); + return 0; } @@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev) iounmap(ipcdev.ipc_base); iounmap(ipcdev.i2c_base); ipcdev.pdev = NULL; + intel_scu_devices_destroy(); } static const struct pci_device_id pci_ids[] = { diff --git a/include/linux/sfi.h b/include/linux/sfi.h index 7f770c638e9..fe817918b30 100644 --- a/include/linux/sfi.h +++ b/include/linux/sfi.h @@ -77,6 +77,8 @@ #define SFI_OEM_ID_SIZE 6 #define SFI_OEM_TABLE_ID_SIZE 8 +#define SFI_NAME_LEN 16 + #define SFI_SYST_SEARCH_BEGIN 0x000E0000 #define SFI_SYST_SEARCH_END 0x000FFFFF @@ -156,13 +158,13 @@ struct sfi_device_table_entry { u16 addr; u8 irq; u32 max_freq; - char name[16]; + char name[SFI_NAME_LEN]; } __packed; struct sfi_gpio_table_entry { - char controller_name[16]; + char controller_name[SFI_NAME_LEN]; u16 pin_no; - char pin_name[16]; + char pin_name[SFI_NAME_LEN]; } __packed; typedef int (*sfi_table_handler) (struct sfi_table_header *table); -- cgit v1.2.3-70-g09d2 From 751305d9b2fd3e03eaab7808e976241d85ca4353 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 28 Oct 2010 18:23:01 +0100 Subject: viafb: General power management infrastructure Multiple devices need S/R hooks (framebuffer, GPIO, camera). Add infrastructure and convert existing framebuffer code to the new model. This patch should create no functional change. Based on earlier work by Jonathan Corbet. Signed-off-by: Daniel Drake Acked-by: Jonathan Corbet Signed-off-by: Florian Tobias Schandinat --- drivers/video/via/via-core.c | 79 ++++++++++++++++++++++++++++++++++++++++++-- drivers/video/via/viafbdev.c | 34 +++++++++---------- drivers/video/via/viafbdev.h | 2 -- include/linux/via-core.h | 15 +++++++++ 4 files changed, 107 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/via/via-core.c b/drivers/video/via/via-core.c index 31e30338e89..42be3d95588 100644 --- a/drivers/video/via/via-core.c +++ b/drivers/video/via/via-core.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include /* * The default port config. @@ -563,6 +565,78 @@ static void via_teardown_subdevs(void) } } +/* + * Power management functions + */ +#ifdef CONFIG_PM +static LIST_HEAD(viafb_pm_hooks); +static DEFINE_MUTEX(viafb_pm_hooks_lock); + +void viafb_pm_register(struct viafb_pm_hooks *hooks) +{ + INIT_LIST_HEAD(&hooks->list); + + mutex_lock(&viafb_pm_hooks_lock); + list_add_tail(&hooks->list, &viafb_pm_hooks); + mutex_unlock(&viafb_pm_hooks_lock); +} +EXPORT_SYMBOL_GPL(viafb_pm_register); + +void viafb_pm_unregister(struct viafb_pm_hooks *hooks) +{ + mutex_lock(&viafb_pm_hooks_lock); + list_del(&hooks->list); + mutex_unlock(&viafb_pm_hooks_lock); +} +EXPORT_SYMBOL_GPL(viafb_pm_unregister); + +static int via_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct viafb_pm_hooks *hooks; + + if (state.event != PM_EVENT_SUSPEND) + return 0; + /* + * "I've occasionally hit a few drivers that caused suspend + * failures, and each and every time it was a driver bug, and + * the right thing to do was to just ignore the error and suspend + * anyway - returning an error code and trying to undo the suspend + * is not what anybody ever really wants, even if our model + *_allows_ for it." + * -- Linus Torvalds, Dec. 7, 2009 + */ + mutex_lock(&viafb_pm_hooks_lock); + list_for_each_entry_reverse(hooks, &viafb_pm_hooks, list) + hooks->suspend(hooks->private); + mutex_unlock(&viafb_pm_hooks_lock); + + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + return 0; +} + +static int via_resume(struct pci_dev *pdev) +{ + struct viafb_pm_hooks *hooks; + + /* Get the bus side powered up */ + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + if (pci_enable_device(pdev)) + return 0; + + pci_set_master(pdev); + + /* Now bring back any subdevs */ + mutex_lock(&viafb_pm_hooks_lock); + list_for_each_entry(hooks, &viafb_pm_hooks, list) + hooks->resume(hooks->private); + mutex_unlock(&viafb_pm_hooks_lock); + + return 0; +} +#endif /* CONFIG_PM */ static int __devinit via_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -572,6 +646,7 @@ static int __devinit via_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) return ret; + /* * Global device initialization. */ @@ -651,8 +726,8 @@ static struct pci_driver via_driver = { .probe = via_pci_probe, .remove = __devexit_p(via_pci_remove), #ifdef CONFIG_PM - .suspend = viafb_suspend, - .resume = viafb_resume, + .suspend = via_suspend, + .resume = via_resume, #endif }; diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c index d298cfccd6f..289edd51952 100644 --- a/drivers/video/via/viafbdev.c +++ b/drivers/video/via/viafbdev.c @@ -1672,31 +1672,19 @@ static int parse_mode(const char *str, u32 *xres, u32 *yres) #ifdef CONFIG_PM -int viafb_suspend(struct pci_dev *pdev, pm_message_t state) +static int viafb_suspend(void *unused) { - if (state.event == PM_EVENT_SUSPEND) { - acquire_console_sem(); - fb_set_suspend(viafbinfo, 1); - - viafb_sync(viafbinfo); - - pci_save_state(pdev); - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - release_console_sem(); - } + acquire_console_sem(); + fb_set_suspend(viafbinfo, 1); + viafb_sync(viafbinfo); + release_console_sem(); return 0; } -int viafb_resume(struct pci_dev *pdev) +static int viafb_resume(void *unused) { acquire_console_sem(); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - if (pci_enable_device(pdev)) - goto fail; - pci_set_master(pdev); if (viaparinfo->shared->vdev->engine_mmio) viafb_reset_engine(viaparinfo); viafb_set_par(viafbinfo); @@ -1704,11 +1692,15 @@ int viafb_resume(struct pci_dev *pdev) viafb_set_par(viafbinfo1); fb_set_suspend(viafbinfo, 0); -fail: release_console_sem(); return 0; } +static struct viafb_pm_hooks viafb_fb_pm_hooks = { + .suspend = viafb_suspend, + .resume = viafb_resume +}; + #endif @@ -1899,6 +1891,10 @@ int __devinit via_fb_pci_probe(struct viafb_dev *vdev) viafb_init_proc(viaparinfo->shared); viafb_init_dac(IGA2); + +#ifdef CONFIG_PM + viafb_pm_register(&viafb_fb_pm_hooks); +#endif return 0; out_fb_unreg: diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h index 4960e3da664..d66f963e930 100644 --- a/drivers/video/via/viafbdev.h +++ b/drivers/video/via/viafbdev.h @@ -108,6 +108,4 @@ void via_fb_pci_remove(struct pci_dev *pdev); /* Temporary */ int viafb_init(void); void viafb_exit(void); -int viafb_suspend(struct pci_dev *pdev, pm_message_t state); -int viafb_resume(struct pci_dev *pdev); #endif /* __VIAFBDEV_H__ */ diff --git a/include/linux/via-core.h b/include/linux/via-core.h index 7ffb521e1a7..a4327a0c8ef 100644 --- a/include/linux/via-core.h +++ b/include/linux/via-core.h @@ -59,6 +59,21 @@ struct via_port_cfg { u8 ioport_index; }; +/* + * Allow subdevs to register suspend/resume hooks. + */ +#ifdef CONFIG_PM +struct viafb_pm_hooks { + struct list_head list; + int (*suspend)(void *private); + int (*resume)(void *private); + void *private; +}; + +void viafb_pm_register(struct viafb_pm_hooks *hooks); +void viafb_pm_unregister(struct viafb_pm_hooks *hooks); +#endif /* CONFIG_PM */ + /* * This is the global viafb "device" containing stuff needed by * all subdevs. -- cgit v1.2.3-70-g09d2 From 4d17aeb1c5b2375769446d13012a98e6d265ec13 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 21 Sep 2010 19:37:15 +0530 Subject: OMAP: I2C: split device registration and convert OMAP2+ to omap_device Split the OMAP1 and OMAP2+ platform_device build and register code. Convert the OMAP2+ variant to use omap_device. This patch was developed in collaboration with Rajendra Nayak . Signed-off-by: Paul Walmsley Signed-off-by: Rajendra Nayak Cc: Kevin Hilman Signed-off-by: Kevin Hilman --- arch/arm/plat-omap/i2c.c | 124 +++++++++++++++++++---------------------------- include/linux/i2c-omap.h | 5 ++ 2 files changed, 54 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c index a5ce4f0aad3..a5bff9ce7cb 100644 --- a/arch/arm/plat-omap/i2c.c +++ b/arch/arm/plat-omap/i2c.c @@ -27,18 +27,18 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #define OMAP_I2C_SIZE 0x3f #define OMAP1_I2C_BASE 0xfffb3800 -#define OMAP2_I2C_BASE1 0x48070000 -#define OMAP2_I2C_BASE2 0x48072000 -#define OMAP2_I2C_BASE3 0x48060000 -#define OMAP4_I2C_BASE4 0x48350000 static const char name[] = "i2c_omap"; @@ -55,15 +55,6 @@ static const char name[] = "i2c_omap"; static struct resource i2c_resources[][2] = { { I2C_RESOURCE_BUILDER(0, 0) }, -#if defined(CONFIG_ARCH_OMAP2PLUS) - { I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE2, 0) }, -#endif -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) - { I2C_RESOURCE_BUILDER(OMAP2_I2C_BASE3, 0) }, -#endif -#if defined(CONFIG_ARCH_OMAP4) - { I2C_RESOURCE_BUILDER(OMAP4_I2C_BASE4, 0) }, -#endif }; #define I2C_DEV_BUILDER(bus_id, res, data) \ @@ -77,18 +68,11 @@ static struct resource i2c_resources[][2] = { }, \ } -static struct omap_i2c_bus_platform_data i2c_pdata[ARRAY_SIZE(i2c_resources)]; +#define MAX_OMAP_I2C_HWMOD_NAME_LEN 16 +#define OMAP_I2C_MAX_CONTROLLERS 4 +static struct omap_i2c_bus_platform_data i2c_pdata[OMAP_I2C_MAX_CONTROLLERS]; static struct platform_device omap_i2c_devices[] = { I2C_DEV_BUILDER(1, i2c_resources[0], &i2c_pdata[0]), -#if defined(CONFIG_ARCH_OMAP2PLUS) - I2C_DEV_BUILDER(2, i2c_resources[1], &i2c_pdata[1]), -#endif -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) - I2C_DEV_BUILDER(3, i2c_resources[2], &i2c_pdata[2]), -#endif -#if defined(CONFIG_ARCH_OMAP4) - I2C_DEV_BUILDER(4, i2c_resources[3], &i2c_pdata[3]), -#endif }; #define OMAP_I2C_CMDLINE_SETUP (BIT(31)) @@ -109,35 +93,20 @@ static int __init omap_i2c_nr_ports(void) return ports; } -/* Shared between omap2 and 3 */ -static resource_size_t omap2_i2c_irq[3] __initdata = { - INT_24XX_I2C1_IRQ, - INT_24XX_I2C2_IRQ, - INT_34XX_I2C3_IRQ, -}; - -static resource_size_t omap4_i2c_irq[4] __initdata = { - OMAP44XX_IRQ_I2C1, - OMAP44XX_IRQ_I2C2, - OMAP44XX_IRQ_I2C3, - OMAP44XX_IRQ_I2C4, -}; - -static inline int omap1_i2c_add_bus(struct platform_device *pdev, int bus_id) +static inline int omap1_i2c_add_bus(int bus_id) { - struct omap_i2c_bus_platform_data *pd; - struct resource *res; - - pd = pdev->dev.platform_data; - res = pdev->resource; - res[0].start = OMAP1_I2C_BASE; - res[0].end = res[0].start + OMAP_I2C_SIZE; - res[1].start = INT_I2C; + struct platform_device *pdev; + struct omap_i2c_bus_platform_data *pdata; + omap1_i2c_mux_pins(bus_id); + pdev = &omap_i2c_devices[bus_id - 1]; + pdata = &i2c_pdata[bus_id - 1]; + return platform_device_register(pdev); } + /* * XXX This function is a temporary compatibility wrapper - only * needed until the I2C driver can be converted to call @@ -148,52 +117,57 @@ static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t) omap_pm_set_max_mpu_wakeup_lat(dev, t); } -static inline int omap2_i2c_add_bus(struct platform_device *pdev, int bus_id) -{ - struct resource *res; - resource_size_t *irq; +static struct omap_device_pm_latency omap_i2c_latency[] = { + [0] = { + .deactivate_func = omap_device_idle_hwmods, + .activate_func = omap_device_enable_hwmods, + .flags = OMAP_DEVICE_LATENCY_AUTO_ADJUST, + }, +}; - res = pdev->resource; +static inline int omap2_i2c_add_bus(int bus_id) +{ + int l; + struct omap_hwmod *oh; + struct omap_device *od; + char oh_name[MAX_OMAP_I2C_HWMOD_NAME_LEN]; + struct omap_i2c_bus_platform_data *pdata; - if (!cpu_is_omap44xx()) - irq = omap2_i2c_irq; - else - irq = omap4_i2c_irq; + omap2_i2c_mux_pins(bus_id); - if (bus_id == 1) { - res[0].start = OMAP2_I2C_BASE1; - res[0].end = res[0].start + OMAP_I2C_SIZE; + l = snprintf(oh_name, MAX_OMAP_I2C_HWMOD_NAME_LEN, "i2c%d", bus_id); + WARN(l >= MAX_OMAP_I2C_HWMOD_NAME_LEN, + "String buffer overflow in I2C%d device setup\n", bus_id); + oh = omap_hwmod_lookup(oh_name); + if (!oh) { + pr_err("Could not look up %s\n", oh_name); + return -EEXIST; } - res[1].start = irq[bus_id - 1]; - omap2_i2c_mux_pins(bus_id); - + pdata = &i2c_pdata[bus_id - 1]; /* * When waiting for completion of a i2c transfer, we need to * set a wake up latency constraint for the MPU. This is to * ensure quick enough wakeup from idle, when transfer * completes. + * Only omap3 has support for constraints */ - if (cpu_is_omap34xx()) { - struct omap_i2c_bus_platform_data *pd; - - pd = pdev->dev.platform_data; - pd->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat; - } - - return platform_device_register(pdev); + if (cpu_is_omap34xx()) + pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat; + od = omap_device_build(name, bus_id, oh, pdata, + sizeof(struct omap_i2c_bus_platform_data), + omap_i2c_latency, ARRAY_SIZE(omap_i2c_latency), 0); + WARN(IS_ERR(od), "Could not build omap_device for %s\n", name); + + return PTR_ERR(od); } static int __init omap_i2c_add_bus(int bus_id) { - struct platform_device *pdev; - - pdev = &omap_i2c_devices[bus_id - 1]; - if (cpu_class_is_omap1()) - return omap1_i2c_add_bus(pdev, bus_id); + return omap1_i2c_add_bus(bus_id); else - return omap2_i2c_add_bus(pdev, bus_id); + return omap2_i2c_add_bus(bus_id); } /** diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 78ebf507ce5..7472449cbb7 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -1,9 +1,14 @@ #ifndef __I2C_OMAP_H__ #define __I2C_OMAP_H__ +#include + struct omap_i2c_bus_platform_data { u32 clkrate; void (*set_mpu_wkup_lat)(struct device *dev, long set); + int (*device_enable) (struct platform_device *pdev); + int (*device_shutdown) (struct platform_device *pdev); + int (*device_idle) (struct platform_device *pdev); }; #endif -- cgit v1.2.3-70-g09d2 From f6cd24777513fcc673d432cc29ef59881d3e4df1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Nov 2010 11:13:48 +0100 Subject: irq: Better struct irqaction layout We currently use kmalloc-96 slab for struct irqaction allocations on 64bit arches. This is unfortunate because of possible false sharing and two cache lines accesses. Move 'name' and 'dir' fields at the end of the structure, and force a suitable alignement. Hot path fields now use one cache line on x86_64. Signed-off-by: Eric Dumazet Reviewed-by: Andi Kleen Cc: Peter Zijlstra LKML-Reference: <1288865628.2659.69.camel@edumazet-laptop> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 79d0c4f6d07..55e0d4253e4 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - const char *name; void *dev_id; struct irqaction *next; int irq; - struct proc_dir_entry *dir; irq_handler_t thread_fn; struct task_struct *thread; unsigned long thread_flags; -}; + const char *name; + struct proc_dir_entry *dir; +} ____cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); -- cgit v1.2.3-70-g09d2 From da1d39e3903bc35be2b5e8d2116fdd5d337244d4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 17:47:02 +0900 Subject: mmc, sh: Move constants to sh_mmcif.h This moves some constants from sh_mmcif.c to sh_mmcif.h so that they can be used in sh_mmcif_boot_init(). It also alters the definition of SOFT_RST_OFF from (0 << 31) to ~SOFT_RST_ON (= ~(1 << 31)). The former seems bogus. The latter is consistent with the code in sh_mmcif_boot_init(). Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- drivers/mmc/host/sh_mmcif.c | 23 ----------------------- include/linux/mmc/sh_mmcif.h | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index ddd09840520..3f492730ec0 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -62,25 +62,6 @@ /* CE_BLOCK_SET */ #define BLOCK_SIZE_MASK 0x0000ffff -/* CE_CLK_CTRL */ -#define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ -#define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) -#define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) -#define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ -#define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ - (1 << 9) | (1 << 8)) /* resp busy timeout */ -#define SRWDTO_29 ((1 << 7) | (1 << 6) | \ - (1 << 5) | (1 << 4)) /* read/write timeout */ -#define SCCSTO_29 ((1 << 3) | (1 << 2) | \ - (1 << 1) | (1 << 0)) /* ccs timeout */ - -/* CE_BUF_ACC */ -#define BUF_ACC_DMAWEN (1 << 25) -#define BUF_ACC_DMAREN (1 << 24) -#define BUF_ACC_BUSW_32 (0 << 17) -#define BUF_ACC_BUSW_16 (1 << 17) -#define BUF_ACC_ATYP (1 << 16) - /* CE_INT */ #define INT_CCSDE (1 << 29) #define INT_CMD12DRE (1 << 26) @@ -165,10 +146,6 @@ STS2_AC12BSYTO | STS2_RSPBSYTO | \ STS2_AC12RSPTO | STS2_RSPTO) -/* CE_VERSION */ -#define SOFT_RST_ON (1 << 31) -#define SOFT_RST_OFF (0 << 31) - #define CLKDEV_EMMC_DATA 52000000 /* 52MHz */ #define CLKDEV_MMC_DATA 20000000 /* 20MHz */ #define CLKDEV_INIT 400000 /* 400 KHz */ diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 5c99da1078a..a6bfa529649 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -59,6 +59,29 @@ struct sh_mmcif_plat_data { #define MMCIF_CE_HOST_STS2 0x0000004C #define MMCIF_CE_VERSION 0x0000007C +/* CE_BUF_ACC */ +#define BUF_ACC_DMAWEN (1 << 25) +#define BUF_ACC_DMAREN (1 << 24) +#define BUF_ACC_BUSW_32 (0 << 17) +#define BUF_ACC_BUSW_16 (1 << 17) +#define BUF_ACC_ATYP (1 << 16) + +/* CE_CLK_CTRL */ +#define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ +#define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ +#define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ + (1 << 9) | (1 << 8)) /* resp busy timeout */ +#define SRWDTO_29 ((1 << 7) | (1 << 6) | \ + (1 << 5) | (1 << 4)) /* read/write timeout */ +#define SCCSTO_29 ((1 << 3) | (1 << 2) | \ + (1 << 1) | (1 << 0)) /* ccs timeout */ + +/* CE_VERSION */ +#define SOFT_RST_ON (1 << 31) +#define SOFT_RST_OFF ~SOFT_RST_ON + static inline u32 sh_mmcif_readl(void __iomem *addr, int reg) { return readl(addr + reg); @@ -149,17 +172,23 @@ static inline void sh_mmcif_boot_init(void __iomem *base) /* reset */ tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | 0x80000000); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & ~0x80000000); + sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON); + sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF); /* byte swap */ - sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, 0x00010000); + sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP); /* Set block size in MMCIF hardware */ sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS); - /* Enable the clock, set it to Bus clock/256 (about 325Khz)*/ - sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01072fff); + /* Enable the clock, set it to Bus clock/256 (about 325Khz). + * It is unclear where 0x70000 comes from or if it is even needed. + * It is there for byte-compatibility with code that is known to + * work. + */ + sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, + CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | + SCCSTO_29 | 0x70000); /* CMD0 */ sh_mmcif_boot_cmd(base, 0x00000040, 0); -- cgit v1.2.3-70-g09d2 From 5e4f083f78d03e9f8d2e327daccde16976f9bb00 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Sun, 24 Oct 2010 11:50:53 +0800 Subject: hrtimer: Remove stale comment on curr_timer curr_timer doesn't resident in struct hrtimer_cpu_base anymore. Signed-off-by: Yong Zhang LKML-Reference: <1287892253-2587-1-git-send-email-yong.zhang0@gmail.com> Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0c1b857d3..dd9954b7934 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -158,7 +158,6 @@ struct hrtimer_clock_base { * @lock: lock protecting the base and associated clock bases * and timers * @clock_base: array of clock bases for this cpu - * @curr_timer: the timer which is executing a callback right now * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode -- cgit v1.2.3-70-g09d2 From b17cd8d69a75f921d9d444cc3ac9b5b1d0b66ca0 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Nov 2010 01:28:24 -0500 Subject: driver core: prune docs about device_interface drivers/base/intf.c was removed before the beginning of (git) time but its Documentation stuck around. Remove it. Signed-off-by: Brandon Philips Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-model/interface.txt | 129 ------------------------------- include/linux/cpu.h | 5 -- include/linux/node.h | 5 -- 3 files changed, 139 deletions(-) delete mode 100644 Documentation/driver-model/interface.txt (limited to 'include/linux') diff --git a/Documentation/driver-model/interface.txt b/Documentation/driver-model/interface.txt deleted file mode 100644 index c66912bfe86..00000000000 --- a/Documentation/driver-model/interface.txt +++ /dev/null @@ -1,129 +0,0 @@ - -Device Interfaces - -Introduction -~~~~~~~~~~~~ - -Device interfaces are the logical interfaces of device classes that correlate -directly to userspace interfaces, like device nodes. - -Each device class may have multiple interfaces through which you can -access the same device. An input device may support the mouse interface, -the 'evdev' interface, and the touchscreen interface. A SCSI disk would -support the disk interface, the SCSI generic interface, and possibly a raw -device interface. - -Device interfaces are registered with the class they belong to. As devices -are added to the class, they are added to each interface registered with -the class. The interface is responsible for determining whether the device -supports the interface or not. - - -Programming Interface -~~~~~~~~~~~~~~~~~~~~~ - -struct device_interface { - char * name; - rwlock_t lock; - u32 devnum; - struct device_class * devclass; - - struct list_head node; - struct driver_dir_entry dir; - - int (*add_device)(struct device *); - int (*add_device)(struct intf_data *); -}; - -int interface_register(struct device_interface *); -void interface_unregister(struct device_interface *); - - -An interface must specify the device class it belongs to. It is added -to that class's list of interfaces on registration. - - -Interfaces can be added to a device class at any time. Whenever it is -added, each device in the class is passed to the interface's -add_device callback. When an interface is removed, each device is -removed from the interface. - - -Devices -~~~~~~~ -Once a device is added to a device class, it is added to each -interface that is registered with the device class. The class -is expected to place a class-specific data structure in -struct device::class_data. The interface can use that (along with -other fields of struct device) to determine whether or not the driver -and/or device support that particular interface. - - -Data -~~~~ - -struct intf_data { - struct list_head node; - struct device_interface * intf; - struct device * dev; - u32 intf_num; -}; - -int interface_add_data(struct interface_data *); - -The interface is responsible for allocating and initializing a struct -intf_data and calling interface_add_data() to add it to the device's list -of interfaces it belongs to. This list will be iterated over when the device -is removed from the class (instead of all possible interfaces for a class). -This structure should probably be embedded in whatever per-device data -structure the interface is allocating anyway. - -Devices are enumerated within the interface. This happens in interface_add_data() -and the enumerated value is stored in the struct intf_data for that device. - -sysfs -~~~~~ -Each interface is given a directory in the directory of the device -class it belongs to: - -Interfaces get a directory in the class's directory as well: - - class/ - `-- input - |-- devices - |-- drivers - |-- mouse - `-- evdev - -When a device is added to the interface, a symlink is created that points -to the device's directory in the physical hierarchy: - - class/ - `-- input - |-- devices - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - |-- drivers - | `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/ - |-- mouse - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - `-- evdev - `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - - -Future Plans -~~~~~~~~~~~~ -A device interface is correlated directly with a userspace interface -for a device, specifically a device node. For instance, a SCSI disk -exposes at least two interfaces to userspace: the standard SCSI disk -interface and the SCSI generic interface. It might also export a raw -device interface. - -Many interfaces have a major number associated with them and each -device gets a minor number. Or, multiple interfaces might share one -major number, and each will receive a range of minor numbers (like in -the case of input devices). - -These major and minor numbers could be stored in the interface -structure. Major and minor allocations could happen when the interface -is registered with the class, or via a helper function. - diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4823af64e9d..5f09323ee88 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -10,11 +10,6 @@ * * CPUs are exported via sysfs in the class/cpu/devices/ * directory. - * - * Per-cpu interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ diff --git a/include/linux/node.h b/include/linux/node.h index 06292dac3ea..1466945cc9e 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -10,11 +10,6 @@ * * Nodes are exported via driverfs in the class/node/devices/ * directory. - * - * Per-node interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ -- cgit v1.2.3-70-g09d2 From 318af55ddd38bdaaa2b57f5c3bd394f3ce3a2610 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Sat, 30 Oct 2010 00:36:47 +0200 Subject: uio: Change mail address of Hans J. Koch My old mail address doesn't exist anymore. This changes all occurrences to my new address. Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 2 +- drivers/uio/uio_cif.c | 2 +- drivers/uio/uio_netx.c | 2 +- include/linux/uio_driver.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a858d2b87b9..51fe1795d5a 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -3,7 +3,7 @@ * * Copyright(C) 2005, Benedikt Spranger * Copyright(C) 2005, Thomas Gleixner - * Copyright(C) 2006, Hans J. Koch + * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * * Userspace IO diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c index a8ea2f19a0c..a84a451159e 100644 --- a/drivers/uio/uio_cif.c +++ b/drivers/uio/uio_cif.c @@ -1,7 +1,7 @@ /* * UIO Hilscher CIF card driver * - * (C) 2007 Hans J. Koch + * (C) 2007 Hans J. Koch * Original code (C) 2005 Benedikt Spranger * * Licensed under GPL version 2 only. diff --git a/drivers/uio/uio_netx.c b/drivers/uio/uio_netx.c index 5a18e9f7b83..5ffdb483b01 100644 --- a/drivers/uio/uio_netx.c +++ b/drivers/uio/uio_netx.c @@ -2,7 +2,7 @@ * UIO driver for Hilscher NetX based fieldbus cards (cifX, comX). * See http://www.hilscher.com for details. * - * (C) 2007 Hans J. Koch + * (C) 2007 Hans J. Koch * (C) 2008 Manuel Traut * * Licensed under GPL version 2 only. diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index d6188e5a52d..665517c05ea 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -3,7 +3,7 @@ * * Copyright(C) 2005, Benedikt Spranger * Copyright(C) 2005, Thomas Gleixner - * Copyright(C) 2006, Hans J. Koch + * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * * Userspace IO driver. -- cgit v1.2.3-70-g09d2 From 0eadcc09203349b11ca477ec367079b23d32ab91 Mon Sep 17 00:00:00 2001 From: Tatyana Brokhman Date: Mon, 1 Nov 2010 18:18:24 +0200 Subject: usb: USB3.0 ch11 definitions Adding hub SuperSpeed usb definitions as defined by ch10 of the USB3.0 spec. Signed-off-by: Tatyana Brokhman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch11.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/hcd.h | 4 ++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h index 119194c85d1..10ec0699bea 100644 --- a/include/linux/usb/ch11.h +++ b/include/linux/usb/ch11.h @@ -27,6 +27,13 @@ #define HUB_GET_TT_STATE 10 #define HUB_STOP_TT 11 +/* + * Hub class additional requests defined by USB 3.0 spec + * See USB 3.0 spec Table 10-6 + */ +#define HUB_SET_DEPTH 12 +#define HUB_GET_PORT_ERR_COUNT 13 + /* * Hub Class feature numbers * See USB 2.0 spec Table 11-17 @@ -55,6 +62,20 @@ #define USB_PORT_FEAT_INDICATOR 22 #define USB_PORT_FEAT_C_PORT_L1 23 +/* + * Port feature selectors added by USB 3.0 spec. + * See USB 3.0 spec Table 10-7 + */ +#define USB_PORT_FEAT_LINK_STATE 5 +#define USB_PORT_FEAT_U1_TIMEOUT 23 +#define USB_PORT_FEAT_U2_TIMEOUT 24 +#define USB_PORT_FEAT_C_LINK_STATE 25 +#define USB_PORT_FEAT_C_CONFIG_ERR 26 +#define USB_PORT_FEAT_REMOTE_WAKE_MASK 27 +#define USB_PORT_FEAT_BH_PORT_RESET 28 +#define USB_PORT_FEAT_C_BH_PORT_RESET 29 +#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30 + /* * Hub Status and Hub Change results * See USB 2.0 spec Table 11-19 and Table 11-20 @@ -83,6 +104,32 @@ struct usb_port_status { /* bits 13 to 15 are reserved */ #define USB_PORT_STAT_SUPER_SPEED 0x8000 /* Linux-internal */ +/* + * Additions to wPortStatus bit field from USB 3.0 + * See USB 3.0 spec Table 10-10 + */ +#define USB_PORT_STAT_LINK_STATE 0x01e0 +#define USB_SS_PORT_STAT_POWER 0x0200 +#define USB_PORT_STAT_SPEED_5GBPS 0x0000 +/* Valid only if port is enabled */ + +/* + * Definitions for PORT_LINK_STATE values + * (bits 5-8) in wPortStatus + */ +#define USB_SS_PORT_LS_U0 0x0000 +#define USB_SS_PORT_LS_U1 0x0020 +#define USB_SS_PORT_LS_U2 0x0040 +#define USB_SS_PORT_LS_U3 0x0060 +#define USB_SS_PORT_LS_SS_DISABLED 0x0080 +#define USB_SS_PORT_LS_RX_DETECT 0x00a0 +#define USB_SS_PORT_LS_SS_INACTIVE 0x00c0 +#define USB_SS_PORT_LS_POLLING 0x00e0 +#define USB_SS_PORT_LS_RECOVERY 0x0100 +#define USB_SS_PORT_LS_HOT_RESET 0x0120 +#define USB_SS_PORT_LS_COMP_MOD 0x0140 +#define USB_SS_PORT_LS_LOOPBACK 0x0160 + /* * wPortChange bit field * See USB 2.0 spec Table 11-22 diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 0b6e751ea0b..dd6ee49a084 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -471,6 +471,10 @@ extern void usb_ep0_reinit(struct usb_device *); /*-------------------------------------------------------------------------*/ +/* class requests from USB 3.0 hub spec, table 10-5 */ +#define SetHubDepth (0x3000 | HUB_SET_DEPTH) +#define GetPortErrorCount (0x8000 | HUB_GET_PORT_ERR_COUNT) + /* * Generic bandwidth allocation constants/support */ -- cgit v1.2.3-70-g09d2 From af3b8881f4c9852eefe9c7f1a997b3ecf580561b Mon Sep 17 00:00:00 2001 From: Russ Gorby Date: Tue, 26 Oct 2010 14:13:52 +0100 Subject: ifx6x60: SPI protocol driver for Infineon 6x60 modem Prototype driver for the IFX6x60 series of SPI attached modems by Jim Stanley and Russ Gorby Signed-off-by: Russ Gorby [Some reworking and a major cleanup] Signed-off-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/serial/Kconfig | 6 + drivers/serial/Makefile | 1 + drivers/serial/ifx6x60.c | 1402 +++++++++++++++++++++++++++++++++++++++++ drivers/serial/ifx6x60.h | 129 ++++ include/linux/spi/ifx_modem.h | 14 + 5 files changed, 1552 insertions(+) create mode 100644 drivers/serial/ifx6x60.c create mode 100644 drivers/serial/ifx6x60.h create mode 100644 include/linux/spi/ifx_modem.h (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index aff9dcd051c..0b9cc17b380 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1632,4 +1632,10 @@ config SERIAL_ALTERA_UART_CONSOLE help Enable a Altera UART port to be the system console. +config SERIAL_IFX6X60 + tristate "SPI protocol driver for Infineon 6x60 modem (EXPERIMENTAL)" + depends on GPIOLIB && EXPERIMENTAL + help + Support for the IFX6x60 modem devices on Intel MID platforms. + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index c5705765454..783638b1069 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -89,3 +89,4 @@ obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o obj-$(CONFIG_SERIAL_MRST_MAX3110) += mrst_max3110.o obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o +obj-$(CONFIG_SERIAL_IFX6X60) += ifx6x60.o diff --git a/drivers/serial/ifx6x60.c b/drivers/serial/ifx6x60.c new file mode 100644 index 00000000000..b9b7e060196 --- /dev/null +++ b/drivers/serial/ifx6x60.c @@ -0,0 +1,1402 @@ +/**************************************************************************** + * + * Driver for the IFX 6x60 spi modem. + * + * Copyright (C) 2008 Option International + * Copyright (C) 2008 Filip Aben + * Denis Joseph Barrow + * Jan Dumon + * + * Copyright (C) 2009, 2010 Intel Corp + * Russ Gorby + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA + * + * Driver modified by Intel from Option gtm501l_spi.c + * + * Notes + * o The driver currently assumes a single device only. If you need to + * change this then look for saved_ifx_dev and add a device lookup + * o The driver is intended to be big-endian safe but has never been + * tested that way (no suitable hardware). There are a couple of FIXME + * notes by areas that may need addressing + * o Some of the GPIO naming/setup assumptions may need revisiting if + * you need to use this driver for another platform. + * + *****************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ifx6x60.h" + +#define IFX_SPI_MORE_MASK 0x10 +#define IFX_SPI_MORE_BIT 12 /* bit position in u16 */ +#define IFX_SPI_CTS_BIT 13 /* bit position in u16 */ +#define IFX_SPI_TTY_ID 0 +#define IFX_SPI_TIMEOUT_SEC 2 +#define IFX_SPI_HEADER_0 (-1) +#define IFX_SPI_HEADER_F (-2) + +/* forward reference */ +static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev); + +/* local variables */ +static int spi_b16 = 1; /* 8 or 16 bit word length */ +static struct tty_driver *tty_drv; +static struct ifx_spi_device *saved_ifx_dev; +static struct lock_class_key ifx_spi_key; + +/* GPIO/GPE settings */ + +/** + * mrdy_set_high - set MRDY GPIO + * @ifx: device we are controlling + * + */ +static inline void mrdy_set_high(struct ifx_spi_device *ifx) +{ + gpio_set_value(ifx->gpio.mrdy, 1); +} + +/** + * mrdy_set_low - clear MRDY GPIO + * @ifx: device we are controlling + * + */ +static inline void mrdy_set_low(struct ifx_spi_device *ifx) +{ + gpio_set_value(ifx->gpio.mrdy, 0); +} + +/** + * ifx_spi_power_state_set + * @ifx_dev: our SPI device + * @val: bits to set + * + * Set bit in power status and signal power system if status becomes non-0 + */ +static void +ifx_spi_power_state_set(struct ifx_spi_device *ifx_dev, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&ifx_dev->power_lock, flags); + + /* + * if power status is already non-0, just update, else + * tell power system + */ + if (!ifx_dev->power_status) + pm_runtime_get(&ifx_dev->spi_dev->dev); + ifx_dev->power_status |= val; + + spin_unlock_irqrestore(&ifx_dev->power_lock, flags); +} + +/** + * ifx_spi_power_state_clear - clear power bit + * @ifx_dev: our SPI device + * @val: bits to clear + * + * clear bit in power status and signal power system if status becomes 0 + */ +static void +ifx_spi_power_state_clear(struct ifx_spi_device *ifx_dev, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&ifx_dev->power_lock, flags); + + if (ifx_dev->power_status) { + ifx_dev->power_status &= ~val; + if (!ifx_dev->power_status) + pm_runtime_put(&ifx_dev->spi_dev->dev); + } + + spin_unlock_irqrestore(&ifx_dev->power_lock, flags); +} + +/** + * swap_buf + * @buf: our buffer + * @len : number of bytes (not words) in the buffer + * @end: end of buffer + * + * Swap the contents of a buffer into big endian format + */ +static inline void swap_buf(u16 *buf, int len, void *end) +{ + int n; + + len = ((len + 1) >> 1); + if ((void *)&buf[len] > end) { + pr_err("swap_buf: swap exceeds boundary (%p > %p)!", + &buf[len], end); + return; + } + for (n = 0; n < len; n++) { + *buf = cpu_to_be16(*buf); + buf++; + } +} + +/** + * mrdy_assert - assert MRDY line + * @ifx_dev: our SPI device + * + * Assert mrdy and set timer to wait for SRDY interrupt, if SRDY is low + * now. + * + * FIXME: Can SRDY even go high as we are running this code ? + */ +static void mrdy_assert(struct ifx_spi_device *ifx_dev) +{ + int val = gpio_get_value(ifx_dev->gpio.srdy); + if (!val) { + if (!test_and_set_bit(IFX_SPI_STATE_TIMER_PENDING, + &ifx_dev->flags)) { + ifx_dev->spi_timer.expires = + jiffies + IFX_SPI_TIMEOUT_SEC*HZ; + add_timer(&ifx_dev->spi_timer); + + } + } + ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_DATA_PENDING); + mrdy_set_high(ifx_dev); +} + +/** + * ifx_spi_hangup - hang up an IFX device + * @ifx_dev: our SPI device + * + * Hang up the tty attached to the IFX device if one is currently + * open. If not take no action + */ +static void ifx_spi_ttyhangup(struct ifx_spi_device *ifx_dev) +{ + struct tty_port *pport = &ifx_dev->tty_port; + struct tty_struct *tty = tty_port_tty_get(pport); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } +} + +/** + * ifx_spi_timeout - SPI timeout + * @arg: our SPI device + * + * The SPI has timed out: hang up the tty. Users will then see a hangup + * and error events. + */ +static void ifx_spi_timeout(unsigned long arg) +{ + struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *)arg; + + dev_warn(&ifx_dev->spi_dev->dev, "*** SPI Timeout ***"); + ifx_spi_ttyhangup(ifx_dev); + mrdy_set_low(ifx_dev); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); +} + +/* char/tty operations */ + +/** + * ifx_spi_tiocmget - get modem lines + * @tty: our tty device + * @filp: file handle issuing the request + * + * Map the signal state into Linux modem flags and report the value + * in Linux terms + */ +static int ifx_spi_tiocmget(struct tty_struct *tty, struct file *filp) +{ + unsigned int value; + struct ifx_spi_device *ifx_dev = tty->driver_data; + + value = + (test_bit(IFX_SPI_RTS, &ifx_dev->signal_state) ? TIOCM_RTS : 0) | + (test_bit(IFX_SPI_DTR, &ifx_dev->signal_state) ? TIOCM_DTR : 0) | + (test_bit(IFX_SPI_CTS, &ifx_dev->signal_state) ? TIOCM_CTS : 0) | + (test_bit(IFX_SPI_DSR, &ifx_dev->signal_state) ? TIOCM_DSR : 0) | + (test_bit(IFX_SPI_DCD, &ifx_dev->signal_state) ? TIOCM_CAR : 0) | + (test_bit(IFX_SPI_RI, &ifx_dev->signal_state) ? TIOCM_RNG : 0); + return value; +} + +/** + * ifx_spi_tiocmset - set modem bits + * @tty: the tty structure + * @filp: file handle issuing the request + * @set: bits to set + * @clear: bits to clear + * + * The IFX6x60 only supports DTR and RTS. Set them accordingly + * and flag that an update to the modem is needed. + * + * FIXME: do we need to kick the tranfers when we do this ? + */ +static int ifx_spi_tiocmset(struct tty_struct *tty, struct file *filp, + unsigned int set, unsigned int clear) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + + if (set & TIOCM_RTS) + set_bit(IFX_SPI_RTS, &ifx_dev->signal_state); + if (set & TIOCM_DTR) + set_bit(IFX_SPI_DTR, &ifx_dev->signal_state); + if (clear & TIOCM_RTS) + clear_bit(IFX_SPI_RTS, &ifx_dev->signal_state); + if (clear & TIOCM_DTR) + clear_bit(IFX_SPI_DTR, &ifx_dev->signal_state); + + set_bit(IFX_SPI_UPDATE, &ifx_dev->signal_state); + return 0; +} + +/** + * ifx_spi_open - called on tty open + * @tty: our tty device + * @filp: file handle being associated with the tty + * + * Open the tty interface. We let the tty_port layer do all the work + * for us. + * + * FIXME: Remove single device assumption and saved_ifx_dev + */ +static int ifx_spi_open(struct tty_struct *tty, struct file *filp) +{ + return tty_port_open(&saved_ifx_dev->tty_port, tty, filp); +} + +/** + * ifx_spi_close - called when our tty closes + * @tty: the tty being closed + * @filp: the file handle being closed + * + * Perform the close of the tty. We use the tty_port layer to do all + * our hard work. + */ +static void ifx_spi_close(struct tty_struct *tty, struct file *filp) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + tty_port_close(&ifx_dev->tty_port, tty, filp); + /* FIXME: should we do an ifx_spi_reset here ? */ +} + +/** + * ifx_decode_spi_header - decode received header + * @buffer: the received data + * @length: decoded length + * @more: decoded more flag + * @received_cts: status of cts we received + * + * Note how received_cts is handled -- if header is all F it is left + * the same as it was, if header is all 0 it is set to 0 otherwise it is + * taken from the incoming header. + * + * FIXME: endianness + */ +static int ifx_spi_decode_spi_header(unsigned char *buffer, int *length, + unsigned char *more, unsigned char *received_cts) +{ + u16 h1; + u16 h2; + u16 *in_buffer = (u16 *)buffer; + + h1 = *in_buffer; + h2 = *(in_buffer+1); + + if (h1 == 0 && h2 == 0) { + *received_cts = 0; + return IFX_SPI_HEADER_0; + } else if (h1 == 0xffff && h2 == 0xffff) { + /* spi_slave_cts remains as it was */ + return IFX_SPI_HEADER_F; + } + + *length = h1 & 0xfff; /* upper bits of byte are flags */ + *more = (buffer[1] >> IFX_SPI_MORE_BIT) & 1; + *received_cts = (buffer[3] >> IFX_SPI_CTS_BIT) & 1; + return 0; +} + +/** + * ifx_setup_spi_header - set header fields + * @txbuffer: pointer to start of SPI buffer + * @tx_count: bytes + * @more: indicate if more to follow + * + * Format up an SPI header for a transfer + * + * FIXME: endianness? + */ +static void ifx_spi_setup_spi_header(unsigned char *txbuffer, int tx_count, + unsigned char more) +{ + *(u16 *)(txbuffer) = tx_count; + *(u16 *)(txbuffer+2) = IFX_SPI_PAYLOAD_SIZE; + txbuffer[1] |= (more << IFX_SPI_MORE_BIT) & IFX_SPI_MORE_MASK; +} + +/** + * ifx_spi_wakeup_serial - SPI space made + * @port_data: our SPI device + * + * We have emptied the FIFO enough that we want to get more data + * queued into it. Poke the line discipline via tty_wakeup so that + * it will feed us more bits + */ +static void ifx_spi_wakeup_serial(struct ifx_spi_device *ifx_dev) +{ + struct tty_struct *tty; + + tty = tty_port_tty_get(&ifx_dev->tty_port); + if (!tty) + return; + tty_wakeup(tty); + tty_kref_put(tty); +} + +/** + * ifx_spi_prepare_tx_buffer - prepare transmit frame + * @ifx_dev: our SPI device + * + * The transmit buffr needs a header and various other bits of + * information followed by as much data as we can pull from the FIFO + * and transfer. This function formats up a suitable buffer in the + * ifx_dev->tx_buffer + * + * FIXME: performance - should we wake the tty when the queue is half + * empty ? + */ +static int ifx_spi_prepare_tx_buffer(struct ifx_spi_device *ifx_dev) +{ + int temp_count; + int queue_length; + int tx_count; + unsigned char *tx_buffer; + + tx_buffer = ifx_dev->tx_buffer; + memset(tx_buffer, 0, IFX_SPI_TRANSFER_SIZE); + + /* make room for required SPI header */ + tx_buffer += IFX_SPI_HEADER_OVERHEAD; + tx_count = IFX_SPI_HEADER_OVERHEAD; + + /* clear to signal no more data if this turns out to be the + * last buffer sent in a sequence */ + ifx_dev->spi_more = 0; + + /* if modem cts is set, just send empty buffer */ + if (!ifx_dev->spi_slave_cts) { + /* see if there's tx data */ + queue_length = kfifo_len(&ifx_dev->tx_fifo); + if (queue_length != 0) { + /* data to mux -- see if there's room for it */ + temp_count = min(queue_length, IFX_SPI_PAYLOAD_SIZE); + temp_count = kfifo_out_locked(&ifx_dev->tx_fifo, + tx_buffer, temp_count, + &ifx_dev->fifo_lock); + + /* update buffer pointer and data count in message */ + tx_buffer += temp_count; + tx_count += temp_count; + if (temp_count == queue_length) + /* poke port to get more data */ + ifx_spi_wakeup_serial(ifx_dev); + else /* more data in port, use next SPI message */ + ifx_dev->spi_more = 1; + } + } + /* have data and info for header -- set up SPI header in buffer */ + /* spi header needs payload size, not entire buffer size */ + ifx_spi_setup_spi_header(ifx_dev->tx_buffer, + tx_count-IFX_SPI_HEADER_OVERHEAD, + ifx_dev->spi_more); + /* swap actual data in the buffer */ + swap_buf((u16 *)(ifx_dev->tx_buffer), tx_count, + &ifx_dev->tx_buffer[IFX_SPI_TRANSFER_SIZE]); + return tx_count; +} + +/** + * ifx_spi_write - line discipline write + * @tty: our tty device + * @buf: pointer to buffer to write (kernel space) + * @count: size of buffer + * + * Write the characters we have been given into the FIFO. If the device + * is not active then activate it, when the SRDY line is asserted back + * this will commence I/O + */ +static int ifx_spi_write(struct tty_struct *tty, const unsigned char *buf, + int count) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + unsigned char *tmp_buf = (unsigned char *)buf; + int tx_count = kfifo_in_locked(&ifx_dev->tx_fifo, tmp_buf, count, + &ifx_dev->fifo_lock); + mrdy_assert(ifx_dev); + return tx_count; +} + +/** + * ifx_spi_chars_in_buffer - line discipline helper + * @tty: our tty device + * + * Report how much data we can accept before we drop bytes. As we use + * a simple FIFO this is nice and easy. + */ +static int ifx_spi_write_room(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + return IFX_SPI_FIFO_SIZE - kfifo_len(&ifx_dev->tx_fifo); +} + +/** + * ifx_spi_chars_in_buffer - line discipline helper + * @tty: our tty device + * + * Report how many characters we have buffered. In our case this is the + * number of bytes sitting in our transmit FIFO. + */ +static int ifx_spi_chars_in_buffer(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + return kfifo_len(&ifx_dev->tx_fifo); +} + +/** + * ifx_port_hangup + * @port: our tty port + * + * tty port hang up. Called when tty_hangup processing is invoked either + * by loss of carrier, or by software (eg vhangup). Serialized against + * activate/shutdown by the tty layer. + */ +static void ifx_spi_hangup(struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = tty->driver_data; + tty_port_hangup(&ifx_dev->tty_port); +} + +/** + * ifx_port_activate + * @port: our tty port + * + * tty port activate method - called for first open. Serialized + * with hangup and shutdown by the tty layer. + */ +static int ifx_port_activate(struct tty_port *port, struct tty_struct *tty) +{ + struct ifx_spi_device *ifx_dev = + container_of(port, struct ifx_spi_device, tty_port); + + /* clear any old data; can't do this in 'close' */ + kfifo_reset(&ifx_dev->tx_fifo); + + /* put port data into this tty */ + tty->driver_data = ifx_dev; + + /* allows flip string push from int context */ + tty->low_latency = 1; + + return 0; +} + +/** + * ifx_port_shutdown + * @port: our tty port + * + * tty port shutdown method - called for last port close. Serialized + * with hangup and activate by the tty layer. + */ +static void ifx_port_shutdown(struct tty_port *port) +{ + struct ifx_spi_device *ifx_dev = + container_of(port, struct ifx_spi_device, tty_port); + + mrdy_set_low(ifx_dev); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); + tasklet_kill(&ifx_dev->io_work_tasklet); +} + +static const struct tty_port_operations ifx_tty_port_ops = { + .activate = ifx_port_activate, + .shutdown = ifx_port_shutdown, +}; + +static const struct tty_operations ifx_spi_serial_ops = { + .open = ifx_spi_open, + .close = ifx_spi_close, + .write = ifx_spi_write, + .hangup = ifx_spi_hangup, + .write_room = ifx_spi_write_room, + .chars_in_buffer = ifx_spi_chars_in_buffer, + .tiocmget = ifx_spi_tiocmget, + .tiocmset = ifx_spi_tiocmset, +}; + +/** + * ifx_spi_insert_fip_string - queue received data + * @ifx_ser: our SPI device + * @chars: buffer we have received + * @size: number of chars reeived + * + * Queue bytes to the tty assuming the tty side is currently open. If + * not the discard the data. + */ +static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev, + unsigned char *chars, size_t size) +{ + struct tty_struct *tty = tty_port_tty_get(&ifx_dev->tty_port); + if (!tty) + return; + tty_insert_flip_string(tty, chars, size); + tty_flip_buffer_push(tty); + tty_kref_put(tty); +} + +/** + * ifx_spi_complete - SPI transfer completed + * @ctx: our SPI device + * + * An SPI transfer has completed. Process any received data and kick off + * any further transmits we can commence. + */ +static void ifx_spi_complete(void *ctx) +{ + struct ifx_spi_device *ifx_dev = ctx; + struct tty_struct *tty; + struct tty_ldisc *ldisc = NULL; + int length; + int actual_length; + unsigned char more; + unsigned char cts; + int local_write_pending = 0; + int queue_length; + int srdy; + int decode_result; + + mrdy_set_low(ifx_dev); + + if (!ifx_dev->spi_msg.status) { + /* check header validity, get comm flags */ + swap_buf((u16 *)ifx_dev->rx_buffer, IFX_SPI_HEADER_OVERHEAD, + &ifx_dev->rx_buffer[IFX_SPI_HEADER_OVERHEAD]); + decode_result = ifx_spi_decode_spi_header(ifx_dev->rx_buffer, + &length, &more, &cts); + if (decode_result == IFX_SPI_HEADER_0) { + dev_dbg(&ifx_dev->spi_dev->dev, + "ignore input: invalid header 0"); + ifx_dev->spi_slave_cts = 0; + goto complete_exit; + } else if (decode_result == IFX_SPI_HEADER_F) { + dev_dbg(&ifx_dev->spi_dev->dev, + "ignore input: invalid header F"); + goto complete_exit; + } + + ifx_dev->spi_slave_cts = cts; + + actual_length = min((unsigned int)length, + ifx_dev->spi_msg.actual_length); + swap_buf((u16 *)(ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD), + actual_length, + &ifx_dev->rx_buffer[IFX_SPI_TRANSFER_SIZE]); + ifx_spi_insert_flip_string( + ifx_dev, + ifx_dev->rx_buffer + IFX_SPI_HEADER_OVERHEAD, + (size_t)actual_length); + } else { + dev_dbg(&ifx_dev->spi_dev->dev, "SPI transfer error %d", + ifx_dev->spi_msg.status); + } + +complete_exit: + if (ifx_dev->write_pending) { + ifx_dev->write_pending = 0; + local_write_pending = 1; + } + + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &(ifx_dev->flags)); + + queue_length = kfifo_len(&ifx_dev->tx_fifo); + srdy = gpio_get_value(ifx_dev->gpio.srdy); + if (!srdy) + ifx_spi_power_state_clear(ifx_dev, IFX_SPI_POWER_SRDY); + + /* schedule output if there is more to do */ + if (test_and_clear_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags)) + tasklet_schedule(&ifx_dev->io_work_tasklet); + else { + if (more || ifx_dev->spi_more || queue_length > 0 || + local_write_pending) { + if (ifx_dev->spi_slave_cts) { + if (more) + mrdy_assert(ifx_dev); + } else + mrdy_assert(ifx_dev); + } else { + /* + * poke line discipline driver if any for more data + * may or may not get more data to write + * for now, say not busy + */ + ifx_spi_power_state_clear(ifx_dev, + IFX_SPI_POWER_DATA_PENDING); + tty = tty_port_tty_get(&ifx_dev->tty_port); + if (tty) { + ldisc = tty_ldisc_ref(tty); + if (ldisc) { + ldisc->ops->write_wakeup(tty); + tty_ldisc_deref(ldisc); + } + tty_kref_put(tty); + } + } + } +} + +/** + * ifx_spio_io - I/O tasklet + * @data: our SPI device + * + * Queue data for transmission if possible and then kick off the + * transfer. + */ +static void ifx_spi_io(unsigned long data) +{ + int retval; + struct ifx_spi_device *ifx_dev = (struct ifx_spi_device *) data; + + if (!test_and_set_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags)) { + if (ifx_dev->gpio.unack_srdy_int_nb > 0) + ifx_dev->gpio.unack_srdy_int_nb--; + + ifx_spi_prepare_tx_buffer(ifx_dev); + + spi_message_init(&ifx_dev->spi_msg); + INIT_LIST_HEAD(&ifx_dev->spi_msg.queue); + + ifx_dev->spi_msg.context = ifx_dev; + ifx_dev->spi_msg.complete = ifx_spi_complete; + + /* set up our spi transfer */ + /* note len is BYTES, not transfers */ + ifx_dev->spi_xfer.len = IFX_SPI_TRANSFER_SIZE; + ifx_dev->spi_xfer.cs_change = 0; + ifx_dev->spi_xfer.speed_hz = 12500000; + /* ifx_dev->spi_xfer.speed_hz = 390625; */ + ifx_dev->spi_xfer.bits_per_word = spi_b16 ? 16 : 8; + + ifx_dev->spi_xfer.tx_buf = ifx_dev->tx_buffer; + ifx_dev->spi_xfer.rx_buf = ifx_dev->rx_buffer; + + /* + * setup dma pointers + */ + if (ifx_dev->is_6160) { + ifx_dev->spi_msg.is_dma_mapped = 1; + ifx_dev->tx_dma = ifx_dev->tx_bus; + ifx_dev->rx_dma = ifx_dev->rx_bus; + ifx_dev->spi_xfer.tx_dma = ifx_dev->tx_dma; + ifx_dev->spi_xfer.rx_dma = ifx_dev->rx_dma; + } else { + ifx_dev->spi_msg.is_dma_mapped = 0; + ifx_dev->tx_dma = (dma_addr_t)0; + ifx_dev->rx_dma = (dma_addr_t)0; + ifx_dev->spi_xfer.tx_dma = (dma_addr_t)0; + ifx_dev->spi_xfer.rx_dma = (dma_addr_t)0; + } + + spi_message_add_tail(&ifx_dev->spi_xfer, &ifx_dev->spi_msg); + + /* Assert MRDY. This may have already been done by the write + * routine. + */ + mrdy_assert(ifx_dev); + + retval = spi_async(ifx_dev->spi_dev, &ifx_dev->spi_msg); + if (retval) { + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, + &ifx_dev->flags); + tasklet_schedule(&ifx_dev->io_work_tasklet); + return; + } + } else + ifx_dev->write_pending = 1; +} + +/** + * ifx_spi_free_port - free up the tty side + * @ifx_dev: IFX device going away + * + * Unregister and free up a port when the device goes away + */ +static void ifx_spi_free_port(struct ifx_spi_device *ifx_dev) +{ + if (ifx_dev->tty_dev) + tty_unregister_device(tty_drv, ifx_dev->minor); + kfifo_free(&ifx_dev->tx_fifo); +} + +/** + * ifx_spi_create_port - create a new port + * @ifx_dev: our spi device + * + * Allocate and initialise the tty port that goes with this interface + * and add it to the tty layer so that it can be opened. + */ +static int ifx_spi_create_port(struct ifx_spi_device *ifx_dev) +{ + int ret = 0; + struct tty_port *pport = &ifx_dev->tty_port; + + spin_lock_init(&ifx_dev->fifo_lock); + lockdep_set_class_and_subclass(&ifx_dev->fifo_lock, + &ifx_spi_key, 0); + + if (kfifo_alloc(&ifx_dev->tx_fifo, IFX_SPI_FIFO_SIZE, GFP_KERNEL)) { + ret = -ENOMEM; + goto error_ret; + } + + pport->ops = &ifx_tty_port_ops; + tty_port_init(pport); + ifx_dev->minor = IFX_SPI_TTY_ID; + ifx_dev->tty_dev = tty_register_device(tty_drv, ifx_dev->minor, + &ifx_dev->spi_dev->dev); + if (IS_ERR(ifx_dev->tty_dev)) { + dev_dbg(&ifx_dev->spi_dev->dev, + "%s: registering tty device failed", __func__); + ret = PTR_ERR(ifx_dev->tty_dev); + goto error_ret; + } + return 0; + +error_ret: + ifx_spi_free_port(ifx_dev); + return ret; +} + +/** + * ifx_spi_handle_srdy - handle SRDY + * @ifx_dev: device asserting SRDY + * + * Check our device state and see what we need to kick off when SRDY + * is asserted. This usually means killing the timer and firing off the + * I/O processing. + */ +static void ifx_spi_handle_srdy(struct ifx_spi_device *ifx_dev) +{ + if (test_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags)) { + del_timer_sync(&ifx_dev->spi_timer); + clear_bit(IFX_SPI_STATE_TIMER_PENDING, &ifx_dev->flags); + } + + ifx_spi_power_state_set(ifx_dev, IFX_SPI_POWER_SRDY); + + if (!test_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags)) + tasklet_schedule(&ifx_dev->io_work_tasklet); + else + set_bit(IFX_SPI_STATE_IO_READY, &ifx_dev->flags); +} + +/** + * ifx_spi_srdy_interrupt - SRDY asserted + * @irq: our IRQ number + * @dev: our ifx device + * + * The modem asserted SRDY. Handle the srdy event + */ +static irqreturn_t ifx_spi_srdy_interrupt(int irq, void *dev) +{ + struct ifx_spi_device *ifx_dev = dev; + ifx_dev->gpio.unack_srdy_int_nb++; + ifx_spi_handle_srdy(ifx_dev); + return IRQ_HANDLED; +} + +/** + * ifx_spi_reset_interrupt - Modem has changed reset state + * @irq: interrupt number + * @dev: our device pointer + * + * The modem has either entered or left reset state. Check the GPIO + * line to see which. + * + * FIXME: review locking on MR_INPROGRESS versus + * parallel unsolicited reset/solicited reset + */ +static irqreturn_t ifx_spi_reset_interrupt(int irq, void *dev) +{ + struct ifx_spi_device *ifx_dev = dev; + int val = gpio_get_value(ifx_dev->gpio.reset_out); + int solreset = test_bit(MR_START, &ifx_dev->mdm_reset_state); + + if (val == 0) { + /* entered reset */ + set_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state); + if (!solreset) { + /* unsolicited reset */ + ifx_spi_ttyhangup(ifx_dev); + } + } else { + /* exited reset */ + clear_bit(MR_INPROGRESS, &ifx_dev->mdm_reset_state); + if (solreset) { + set_bit(MR_COMPLETE, &ifx_dev->mdm_reset_state); + wake_up(&ifx_dev->mdm_reset_wait); + } + } + return IRQ_HANDLED; +} + +/** + * ifx_spi_free_device - free device + * @ifx_dev: device to free + * + * Free the IFX device + */ +static void ifx_spi_free_device(struct ifx_spi_device *ifx_dev) +{ + ifx_spi_free_port(ifx_dev); + dma_free_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + ifx_dev->tx_buffer, + ifx_dev->tx_bus); + dma_free_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + ifx_dev->rx_buffer, + ifx_dev->rx_bus); +} + +/** + * ifx_spi_reset - reset modem + * @ifx_dev: modem to reset + * + * Perform a reset on the modem + */ +static int ifx_spi_reset(struct ifx_spi_device *ifx_dev) +{ + int ret; + /* + * set up modem power, reset + * + * delays are required on some platforms for the modem + * to reset properly + */ + set_bit(MR_START, &ifx_dev->mdm_reset_state); + gpio_set_value(ifx_dev->gpio.po, 0); + gpio_set_value(ifx_dev->gpio.reset, 0); + msleep(25); + gpio_set_value(ifx_dev->gpio.reset, 1); + msleep(1); + gpio_set_value(ifx_dev->gpio.po, 1); + msleep(1); + gpio_set_value(ifx_dev->gpio.po, 0); + ret = wait_event_timeout(ifx_dev->mdm_reset_wait, + test_bit(MR_COMPLETE, + &ifx_dev->mdm_reset_state), + IFX_RESET_TIMEOUT); + if (!ret) + dev_warn(&ifx_dev->spi_dev->dev, "Modem reset timeout: (state:%lx)", + ifx_dev->mdm_reset_state); + + ifx_dev->mdm_reset_state = 0; + return ret; +} + +/** + * ifx_spi_spi_probe - probe callback + * @spi: our possible matching SPI device + * + * Probe for a 6x60 modem on SPI bus. Perform any needed device and + * GPIO setup. + * + * FIXME: + * - Support for multiple devices + * - Split out MID specific GPIO handling eventually + */ + +static int ifx_spi_spi_probe(struct spi_device *spi) +{ + int ret; + int srdy; + struct ifx_modem_platform_data *pl_data = NULL; + struct ifx_spi_device *ifx_dev; + + if (saved_ifx_dev) { + dev_dbg(&spi->dev, "ignoring subsequent detection"); + return -ENODEV; + } + + /* initialize structure to hold our device variables */ + ifx_dev = kzalloc(sizeof(struct ifx_spi_device), GFP_KERNEL); + if (!ifx_dev) { + dev_err(&spi->dev, "spi device allocation failed"); + return -ENOMEM; + } + saved_ifx_dev = ifx_dev; + ifx_dev->spi_dev = spi; + clear_bit(IFX_SPI_STATE_IO_IN_PROGRESS, &ifx_dev->flags); + spin_lock_init(&ifx_dev->write_lock); + spin_lock_init(&ifx_dev->power_lock); + ifx_dev->power_status = 0; + init_timer(&ifx_dev->spi_timer); + ifx_dev->spi_timer.function = ifx_spi_timeout; + ifx_dev->spi_timer.data = (unsigned long)ifx_dev; + ifx_dev->is_6160 = pl_data->is_6160; + + /* ensure SPI protocol flags are initialized to enable transfer */ + ifx_dev->spi_more = 0; + ifx_dev->spi_slave_cts = 0; + + /*initialize transfer and dma buffers */ + ifx_dev->tx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + &ifx_dev->tx_bus, + GFP_KERNEL); + if (!ifx_dev->tx_buffer) { + dev_err(&spi->dev, "DMA-TX buffer allocation failed"); + ret = -ENOMEM; + goto error_ret; + } + ifx_dev->rx_buffer = dma_alloc_coherent(&ifx_dev->spi_dev->dev, + IFX_SPI_TRANSFER_SIZE, + &ifx_dev->rx_bus, + GFP_KERNEL); + if (!ifx_dev->rx_buffer) { + dev_err(&spi->dev, "DMA-RX buffer allocation failed"); + ret = -ENOMEM; + goto error_ret; + } + + /* initialize waitq for modem reset */ + init_waitqueue_head(&ifx_dev->mdm_reset_wait); + + spi_set_drvdata(spi, ifx_dev); + tasklet_init(&ifx_dev->io_work_tasklet, ifx_spi_io, + (unsigned long)ifx_dev); + + set_bit(IFX_SPI_STATE_PRESENT, &ifx_dev->flags); + + /* create our tty port */ + ret = ifx_spi_create_port(ifx_dev); + if (ret != 0) { + dev_err(&spi->dev, "create default tty port failed"); + goto error_ret; + } + + pl_data = (struct ifx_modem_platform_data *)spi->dev.platform_data; + if (pl_data) { + ifx_dev->gpio.reset = pl_data->rst_pmu; + ifx_dev->gpio.po = pl_data->pwr_on; + ifx_dev->gpio.mrdy = pl_data->mrdy; + ifx_dev->gpio.srdy = pl_data->srdy; + ifx_dev->gpio.reset_out = pl_data->rst_out; + } else { + dev_err(&spi->dev, "missing platform data!"); + ret = -ENODEV; + goto error_ret; + } + + dev_info(&spi->dev, "gpios %d, %d, %d, %d, %d", + ifx_dev->gpio.reset, ifx_dev->gpio.po, ifx_dev->gpio.mrdy, + ifx_dev->gpio.srdy, ifx_dev->gpio.reset_out); + + /* Configure gpios */ + ret = gpio_request(ifx_dev->gpio.reset, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET)", + ifx_dev->gpio.reset); + goto error_ret; + } + ret += gpio_direction_output(ifx_dev->gpio.reset, 0); + ret += gpio_export(ifx_dev->gpio.reset, 1); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (RESET)", + ifx_dev->gpio.reset); + ret = -EBUSY; + goto error_ret2; + } + + ret = gpio_request(ifx_dev->gpio.po, "ifxModem"); + ret += gpio_direction_output(ifx_dev->gpio.po, 0); + ret += gpio_export(ifx_dev->gpio.po, 1); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (ON)", + ifx_dev->gpio.po); + ret = -EBUSY; + goto error_ret3; + } + + ret = gpio_request(ifx_dev->gpio.mrdy, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (MRDY)", + ifx_dev->gpio.mrdy); + goto error_ret3; + } + ret += gpio_export(ifx_dev->gpio.mrdy, 1); + ret += gpio_direction_output(ifx_dev->gpio.mrdy, 0); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (MRDY)", + ifx_dev->gpio.mrdy); + ret = -EBUSY; + goto error_ret4; + } + + ret = gpio_request(ifx_dev->gpio.srdy, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (SRDY)", + ifx_dev->gpio.srdy); + ret = -EBUSY; + goto error_ret4; + } + ret += gpio_export(ifx_dev->gpio.srdy, 1); + ret += gpio_direction_input(ifx_dev->gpio.srdy); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (SRDY)", + ifx_dev->gpio.srdy); + ret = -EBUSY; + goto error_ret5; + } + + ret = gpio_request(ifx_dev->gpio.reset_out, "ifxModem"); + if (ret < 0) { + dev_err(&spi->dev, "Unable to allocate GPIO%d (RESET_OUT)", + ifx_dev->gpio.reset_out); + goto error_ret5; + } + ret += gpio_export(ifx_dev->gpio.reset_out, 1); + ret += gpio_direction_input(ifx_dev->gpio.reset_out); + if (ret) { + dev_err(&spi->dev, "Unable to configure GPIO%d (RESET_OUT)", + ifx_dev->gpio.reset_out); + ret = -EBUSY; + goto error_ret6; + } + + ret = request_irq(gpio_to_irq(ifx_dev->gpio.reset_out), + ifx_spi_reset_interrupt, + IRQF_TRIGGER_RISING|IRQF_TRIGGER_FALLING, DRVNAME, + (void *)ifx_dev); + if (ret) { + dev_err(&spi->dev, "Unable to get irq %x\n", + gpio_to_irq(ifx_dev->gpio.reset_out)); + goto error_ret6; + } + + ret = ifx_spi_reset(ifx_dev); + + ret = request_irq(gpio_to_irq(ifx_dev->gpio.srdy), + ifx_spi_srdy_interrupt, + IRQF_TRIGGER_RISING, DRVNAME, + (void *)ifx_dev); + if (ret) { + dev_err(&spi->dev, "Unable to get irq %x", + gpio_to_irq(ifx_dev->gpio.srdy)); + goto error_ret6; + } + + /* set pm runtime power state and register with power system */ + pm_runtime_set_active(&spi->dev); + pm_runtime_enable(&spi->dev); + + /* handle case that modem is already signaling SRDY */ + /* no outgoing tty open at this point, this just satisfies the + * modem's read and should reset communication properly + */ + srdy = gpio_get_value(ifx_dev->gpio.srdy); + + if (srdy) { + mrdy_assert(ifx_dev); + ifx_spi_handle_srdy(ifx_dev); + } else + mrdy_set_low(ifx_dev); + return 0; + +error_ret6: + gpio_free(ifx_dev->gpio.srdy); +error_ret5: + gpio_free(ifx_dev->gpio.mrdy); +error_ret4: + gpio_free(ifx_dev->gpio.reset); +error_ret3: + gpio_free(ifx_dev->gpio.po); +error_ret2: + gpio_free(ifx_dev->gpio.reset_out); +error_ret: + ifx_spi_free_device(ifx_dev); + saved_ifx_dev = NULL; + return ret; +} + +/** + * ifx_spi_spi_remove - SPI device was removed + * @spi: SPI device + * + * FIXME: We should be shutting the device down here not in + * the module unload path. + */ + +static int ifx_spi_spi_remove(struct spi_device *spi) +{ + struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); + /* stop activity */ + tasklet_kill(&ifx_dev->io_work_tasklet); + /* free irq */ + free_irq(gpio_to_irq(ifx_dev->gpio.reset_out), (void *)ifx_dev); + free_irq(gpio_to_irq(ifx_dev->gpio.srdy), (void *)ifx_dev); + + gpio_free(ifx_dev->gpio.srdy); + gpio_free(ifx_dev->gpio.mrdy); + gpio_free(ifx_dev->gpio.reset); + gpio_free(ifx_dev->gpio.po); + gpio_free(ifx_dev->gpio.reset_out); + + /* free allocations */ + ifx_spi_free_device(ifx_dev); + + saved_ifx_dev = NULL; + return 0; +} + +/** + * ifx_spi_spi_shutdown - called on SPI shutdown + * @spi: SPI device + * + * No action needs to be taken here + */ + +static void ifx_spi_spi_shutdown(struct spi_device *spi) +{ +} + +/* + * various suspends and resumes have nothing to do + * no hardware to save state for + */ + +/** + * ifx_spi_spi_suspend - suspend SPI on system suspend + * @dev: device being suspended + * + * Suspend the SPI side. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_spi_suspend(struct spi_device *spi, pm_message_t msg) +{ + return 0; +} + +/** + * ifx_spi_spi_resume - resume SPI side on system resume + * @dev: device being suspended + * + * Suspend the SPI side. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_spi_resume(struct spi_device *spi) +{ + return 0; +} + +/** + * ifx_spi_pm_suspend - suspend modem on system suspend + * @dev: device being suspended + * + * Suspend the modem. No action needed on Intel MID platforms, may + * need extending for other systems. + */ +static int ifx_spi_pm_suspend(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_resume - resume modem on system resume + * @dev: device being suspended + * + * Allow the modem to resume. No action needed. + * + * FIXME: do we need to reset anything here ? + */ +static int ifx_spi_pm_resume(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_resume - suspend modem + * @dev: device being suspended + * + * Allow the modem to resume. No action needed. + */ +static int ifx_spi_pm_runtime_resume(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_suspend - suspend modem + * @dev: device being suspended + * + * Allow the modem to suspend and thus suspend to continue up the + * device tree. + */ +static int ifx_spi_pm_runtime_suspend(struct device *dev) +{ + return 0; +} + +/** + * ifx_spi_pm_runtime_idle - check if modem idle + * @dev: our device + * + * Check conditions and queue runtime suspend if idle. + */ +static int ifx_spi_pm_runtime_idle(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); + + if (!ifx_dev->power_status) + pm_runtime_suspend(dev); + + return 0; +} + +static const struct dev_pm_ops ifx_spi_pm = { + .resume = ifx_spi_pm_resume, + .suspend = ifx_spi_pm_suspend, + .runtime_resume = ifx_spi_pm_runtime_resume, + .runtime_suspend = ifx_spi_pm_runtime_suspend, + .runtime_idle = ifx_spi_pm_runtime_idle +}; + +static const struct spi_device_id ifx_id_table[] = { + {"ifx6160", 0}, + {"ifx6260", 0}, + { } +}; +MODULE_DEVICE_TABLE(spi, ifx_id_table); + +/* spi operations */ +static const struct spi_driver ifx_spi_driver_6160 = { + .driver = { + .name = "ifx6160", + .bus = &spi_bus_type, + .pm = &ifx_spi_pm, + .owner = THIS_MODULE}, + .probe = ifx_spi_spi_probe, + .shutdown = ifx_spi_spi_shutdown, + .remove = __devexit_p(ifx_spi_spi_remove), + .suspend = ifx_spi_spi_suspend, + .resume = ifx_spi_spi_resume, + .id_table = ifx_id_table +}; + +/** + * ifx_spi_exit - module exit + * + * Unload the module. + */ + +static void __exit ifx_spi_exit(void) +{ + /* unregister */ + tty_unregister_driver(tty_drv); + spi_unregister_driver((void *)&ifx_spi_driver_6160); +} + +/** + * ifx_spi_init - module entry point + * + * Initialise the SPI and tty interfaces for the IFX SPI driver + * We need to initialize upper-edge spi driver after the tty + * driver because otherwise the spi probe will race + */ + +static int __init ifx_spi_init(void) +{ + int result; + + tty_drv = alloc_tty_driver(1); + if (!tty_drv) { + pr_err("%s: alloc_tty_driver failed", DRVNAME); + return -ENOMEM; + } + + tty_drv->magic = TTY_DRIVER_MAGIC; + tty_drv->owner = THIS_MODULE; + tty_drv->driver_name = DRVNAME; + tty_drv->name = TTYNAME; + tty_drv->minor_start = IFX_SPI_TTY_ID; + tty_drv->num = 1; + tty_drv->type = TTY_DRIVER_TYPE_SERIAL; + tty_drv->subtype = SERIAL_TYPE_NORMAL; + tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; + tty_drv->init_termios = tty_std_termios; + + tty_set_operations(tty_drv, &ifx_spi_serial_ops); + + result = tty_register_driver(tty_drv); + if (result) { + pr_err("%s: tty_register_driver failed(%d)", + DRVNAME, result); + return result; + } + + result = spi_register_driver((void *)&ifx_spi_driver_6160); + if (result) { + pr_err("%s: spi_register_driver failed(%d)", + DRVNAME, result); + tty_unregister_driver(tty_drv); + } + return result; +} + +module_init(ifx_spi_init); +module_exit(ifx_spi_exit); + +MODULE_AUTHOR("Intel"); +MODULE_DESCRIPTION("IFX6x60 spi driver"); +MODULE_LICENSE("GPL"); +MODULE_INFO(Version, "0.1-IFX6x60"); diff --git a/drivers/serial/ifx6x60.h b/drivers/serial/ifx6x60.h new file mode 100644 index 00000000000..deb7b8d977d --- /dev/null +++ b/drivers/serial/ifx6x60.h @@ -0,0 +1,129 @@ +/**************************************************************************** + * + * Driver for the IFX spi modem. + * + * Copyright (C) 2009, 2010 Intel Corp + * Jim Stanley + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA + * + * + * + *****************************************************************************/ +#ifndef _IFX6X60_H +#define _IFX6X60_H + +#define DRVNAME "ifx6x60" +#define TTYNAME "ttyIFX" + +/* #define IFX_THROTTLE_CODE */ + +#define IFX_SPI_MAX_MINORS 1 +#define IFX_SPI_TRANSFER_SIZE 2048 +#define IFX_SPI_FIFO_SIZE 4096 + +#define IFX_SPI_HEADER_OVERHEAD 4 +#define IFX_RESET_TIMEOUT msecs_to_jiffies(50) + +/* device flags bitfield definitions */ +#define IFX_SPI_STATE_PRESENT 0 +#define IFX_SPI_STATE_IO_IN_PROGRESS 1 +#define IFX_SPI_STATE_IO_READY 2 +#define IFX_SPI_STATE_TIMER_PENDING 3 + +/* flow control bitfields */ +#define IFX_SPI_DCD 0 +#define IFX_SPI_CTS 1 +#define IFX_SPI_DSR 2 +#define IFX_SPI_RI 3 +#define IFX_SPI_DTR 4 +#define IFX_SPI_RTS 5 +#define IFX_SPI_TX_FC 6 +#define IFX_SPI_RX_FC 7 +#define IFX_SPI_UPDATE 8 + +#define IFX_SPI_PAYLOAD_SIZE (IFX_SPI_TRANSFER_SIZE - \ + IFX_SPI_HEADER_OVERHEAD) + +#define IFX_SPI_IRQ_TYPE DETECT_EDGE_RISING +#define IFX_SPI_GPIO_TARGET 0 +#define IFX_SPI_GPIO0 0x105 + +#define IFX_SPI_STATUS_TIMEOUT (2000*HZ) + +/* values for bits in power status byte */ +#define IFX_SPI_POWER_DATA_PENDING 1 +#define IFX_SPI_POWER_SRDY 2 + +struct ifx_spi_device { + /* Our SPI device */ + struct spi_device *spi_dev; + + /* Port specific data */ + struct kfifo tx_fifo; + spinlock_t fifo_lock; + unsigned long signal_state; + + /* TTY Layer logic */ + struct tty_port tty_port; + struct device *tty_dev; + int minor; + + /* Low level I/O work */ + struct tasklet_struct io_work_tasklet; + unsigned long flags; + dma_addr_t rx_dma; + dma_addr_t tx_dma; + + int is_6160; /* Modem type */ + + spinlock_t write_lock; + int write_pending; + spinlock_t power_lock; + unsigned char power_status; + + unsigned char *rx_buffer; + unsigned char *tx_buffer; + dma_addr_t rx_bus; + dma_addr_t tx_bus; + unsigned char spi_more; + unsigned char spi_slave_cts; + + struct timer_list spi_timer; + + struct spi_message spi_msg; + struct spi_transfer spi_xfer; + + struct { + /* gpio lines */ + unsigned short srdy; /* slave-ready gpio */ + unsigned short mrdy; /* master-ready gpio */ + unsigned short reset; /* modem-reset gpio */ + unsigned short po; /* modem-on gpio */ + unsigned short reset_out; /* modem-in-reset gpio */ + /* state/stats */ + int unack_srdy_int_nb; + } gpio; + + /* modem reset */ + unsigned long mdm_reset_state; +#define MR_START 0 +#define MR_INPROGRESS 1 +#define MR_COMPLETE 2 + wait_queue_head_t mdm_reset_wait; +}; + +#endif /* _IFX6X60_H */ diff --git a/include/linux/spi/ifx_modem.h b/include/linux/spi/ifx_modem.h new file mode 100644 index 00000000000..a68f3b19d11 --- /dev/null +++ b/include/linux/spi/ifx_modem.h @@ -0,0 +1,14 @@ +#ifndef LINUX_IFX_MODEM_H +#define LINUX_IFX_MODEM_H + +struct ifx_modem_platform_data { + unsigned short rst_out; /* modem reset out */ + unsigned short pwr_on; /* power on */ + unsigned short rst_pmu; /* reset modem */ + unsigned short tx_pwr; /* modem power threshold */ + unsigned short srdy; /* SRDY */ + unsigned short mrdy; /* MRDY */ + unsigned short is_6160; /* Modem type */ +}; + +#endif -- cgit v1.2.3-70-g09d2 From 304e12665a4a7b8b25dfe8c64fa4fd56a04a67ea Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Mon, 8 Nov 2010 20:33:20 +0300 Subject: serial: Add support for UART on VIA VT8500 and compatibles This adds a driver for the serial ports found in VIA and WonderMedia Systems-on-Chip. Interrupt-driven FIFO operation is implemented. The hardware also supports pure register-based operation (which is slower) and DMA-based FIFO operation. As the FIFOs are only 16 bytes long, DMA operation is probably not worth the hassle. Signed-off-by: Alexey Charkov Signed-off-by: Greg Kroah-Hartman --- drivers/serial/Kconfig | 10 + drivers/serial/Makefile | 1 + drivers/serial/vt8500_serial.c | 648 +++++++++++++++++++++++++++++++++++++++++ include/linux/serial_core.h | 3 + 4 files changed, 662 insertions(+) create mode 100644 drivers/serial/vt8500_serial.c (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 0b9cc17b380..388e37132cc 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1381,6 +1381,16 @@ config SERIAL_MSM_CONSOLE depends on SERIAL_MSM=y select SERIAL_CORE_CONSOLE +config SERIAL_VT8500 + bool "VIA VT8500 on-chip serial port support" + depends on ARM && ARCH_VT8500 + select SERIAL_CORE + +config SERIAL_VT8500_CONSOLE + bool "VIA VT8500 serial console support" + depends on SERIAL_VT8500=y + select SERIAL_CORE_CONSOLE + config SERIAL_NETX tristate "NetX serial port support" depends on ARM && ARCH_NETX diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 783638b1069..a5e2264b2a8 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o +obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o obj-$(CONFIG_SERIAL_MRST_MAX3110) += mrst_max3110.o obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o diff --git a/drivers/serial/vt8500_serial.c b/drivers/serial/vt8500_serial.c new file mode 100644 index 00000000000..322bf56c0d8 --- /dev/null +++ b/drivers/serial/vt8500_serial.c @@ -0,0 +1,648 @@ +/* + * drivers/serial/vt8500_serial.c + * + * Copyright (C) 2010 Alexey Charkov + * + * Based on msm_serial.c, which is: + * Copyright (C) 2007 Google, Inc. + * Author: Robert Love + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#if defined(CONFIG_SERIAL_VT8500_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +# define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * UART Register offsets + */ + +#define VT8500_URTDR 0x0000 /* Transmit data */ +#define VT8500_URRDR 0x0004 /* Receive data */ +#define VT8500_URDIV 0x0008 /* Clock/Baud rate divisor */ +#define VT8500_URLCR 0x000C /* Line control */ +#define VT8500_URICR 0x0010 /* IrDA control */ +#define VT8500_URIER 0x0014 /* Interrupt enable */ +#define VT8500_URISR 0x0018 /* Interrupt status */ +#define VT8500_URUSR 0x001c /* UART status */ +#define VT8500_URFCR 0x0020 /* FIFO control */ +#define VT8500_URFIDX 0x0024 /* FIFO index */ +#define VT8500_URBKR 0x0028 /* Break signal count */ +#define VT8500_URTOD 0x002c /* Time out divisor */ +#define VT8500_TXFIFO 0x1000 /* Transmit FIFO (16x8) */ +#define VT8500_RXFIFO 0x1020 /* Receive FIFO (16x10) */ + +/* + * Interrupt enable and status bits + */ + +#define TXDE (1 << 0) /* Tx Data empty */ +#define RXDF (1 << 1) /* Rx Data full */ +#define TXFAE (1 << 2) /* Tx FIFO almost empty */ +#define TXFE (1 << 3) /* Tx FIFO empty */ +#define RXFAF (1 << 4) /* Rx FIFO almost full */ +#define RXFF (1 << 5) /* Rx FIFO full */ +#define TXUDR (1 << 6) /* Tx underrun */ +#define RXOVER (1 << 7) /* Rx overrun */ +#define PER (1 << 8) /* Parity error */ +#define FER (1 << 9) /* Frame error */ +#define TCTS (1 << 10) /* Toggle of CTS */ +#define RXTOUT (1 << 11) /* Rx timeout */ +#define BKDONE (1 << 12) /* Break signal done */ +#define ERR (1 << 13) /* AHB error response */ + +#define RX_FIFO_INTS (RXFAF | RXFF | RXOVER | PER | FER | RXTOUT) +#define TX_FIFO_INTS (TXFAE | TXFE | TXUDR) + +struct vt8500_port { + struct uart_port uart; + char name[16]; + struct clk *clk; + unsigned int ier; +}; + +static inline void vt8500_write(struct uart_port *port, unsigned int val, + unsigned int off) +{ + writel(val, port->membase + off); +} + +static inline unsigned int vt8500_read(struct uart_port *port, unsigned int off) +{ + return readl(port->membase + off); +} + +static void vt8500_stop_tx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void vt8500_stop_rx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~RX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void vt8500_enable_ms(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier |= TCTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void handle_rx(struct uart_port *port) +{ + struct tty_struct *tty = tty_port_tty_get(&port->state->port); + if (!tty) { + /* Discard data: no tty available */ + int count = (vt8500_read(port, VT8500_URFIDX) & 0x1f00) >> 8; + u16 ch; + while (count--) + ch = readw(port->membase + VT8500_RXFIFO); + return; + } + + /* + * Handle overrun + */ + if ((vt8500_read(port, VT8500_URISR) & RXOVER)) { + port->icount.overrun++; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + } + + /* and now the main RX loop */ + while (vt8500_read(port, VT8500_URFIDX) & 0x1f00) { + unsigned int c; + char flag = TTY_NORMAL; + + c = readw(port->membase + VT8500_RXFIFO) & 0x3ff; + + /* Mask conditions we're ignorning. */ + c &= ~port->read_status_mask; + + if (c & FER) { + port->icount.frame++; + flag = TTY_FRAME; + } else if (c & PER) { + port->icount.parity++; + flag = TTY_PARITY; + } + port->icount.rx++; + + if (!uart_handle_sysrq_char(port, c)) + tty_insert_flip_char(tty, c, flag); + } + + tty_flip_buffer_push(tty); + tty_kref_put(tty); +} + +static void handle_tx(struct uart_port *port) +{ + struct circ_buf *xmit = &port->state->xmit; + + if (port->x_char) { + writeb(port->x_char, port->membase + VT8500_TXFIFO); + port->icount.tx++; + port->x_char = 0; + } + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + vt8500_stop_tx(port); + return; + } + + while ((vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16) { + if (uart_circ_empty(xmit)) + break; + + writeb(xmit->buf[xmit->tail], port->membase + VT8500_TXFIFO); + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + if (uart_circ_empty(xmit)) + vt8500_stop_tx(port); +} + +static void vt8500_start_tx(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = container_of(port, + struct vt8500_port, + uart); + + vt8500_port->ier &= ~TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); + handle_tx(port); + vt8500_port->ier |= TX_FIFO_INTS; + vt8500_write(port, vt8500_port->ier, VT8500_URIER); +} + +static void handle_delta_cts(struct uart_port *port) +{ + port->icount.cts++; + wake_up_interruptible(&port->state->port.delta_msr_wait); +} + +static irqreturn_t vt8500_irq(int irq, void *dev_id) +{ + struct uart_port *port = dev_id; + unsigned long isr; + + spin_lock(&port->lock); + isr = vt8500_read(port, VT8500_URISR); + + /* Acknowledge active status bits */ + vt8500_write(port, isr, VT8500_URISR); + + if (isr & RX_FIFO_INTS) + handle_rx(port); + if (isr & TX_FIFO_INTS) + handle_tx(port); + if (isr & TCTS) + handle_delta_cts(port); + + spin_unlock(&port->lock); + + return IRQ_HANDLED; +} + +static unsigned int vt8500_tx_empty(struct uart_port *port) +{ + return (vt8500_read(port, VT8500_URFIDX) & 0x1f) < 16 ? + TIOCSER_TEMT : 0; +} + +static unsigned int vt8500_get_mctrl(struct uart_port *port) +{ + unsigned int usr; + + usr = vt8500_read(port, VT8500_URUSR); + if (usr & (1 << 4)) + return TIOCM_CTS; + else + return 0; +} + +static void vt8500_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ +} + +static void vt8500_break_ctl(struct uart_port *port, int break_ctl) +{ + if (break_ctl) + vt8500_write(port, vt8500_read(port, VT8500_URLCR) | (1 << 9), + VT8500_URLCR); +} + +static int vt8500_set_baud_rate(struct uart_port *port, unsigned int baud) +{ + unsigned long div; + unsigned int loops = 1000; + + div = vt8500_read(port, VT8500_URDIV) & ~(0x3ff); + + if (unlikely((baud < 900) || (baud > 921600))) + div |= 7; + else + div |= (921600 / baud) - 1; + + while ((vt8500_read(port, VT8500_URUSR) & (1 << 5)) && --loops) + cpu_relax(); + vt8500_write(port, div, VT8500_URDIV); + + return baud; +} + +static int vt8500_startup(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + int ret; + + snprintf(vt8500_port->name, sizeof(vt8500_port->name), + "vt8500_serial%d", port->line); + + ret = request_irq(port->irq, vt8500_irq, IRQF_TRIGGER_HIGH, + vt8500_port->name, port); + if (unlikely(ret)) + return ret; + + vt8500_write(port, 0x03, VT8500_URLCR); /* enable TX & RX */ + + return 0; +} + +static void vt8500_shutdown(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + + vt8500_port->ier = 0; + + /* disable interrupts and FIFOs */ + vt8500_write(&vt8500_port->uart, 0, VT8500_URIER); + vt8500_write(&vt8500_port->uart, 0x880, VT8500_URFCR); + free_irq(port->irq, port); +} + +static void vt8500_set_termios(struct uart_port *port, + struct ktermios *termios, + struct ktermios *old) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + unsigned long flags; + unsigned int baud, lcr; + unsigned int loops = 1000; + + spin_lock_irqsave(&port->lock, flags); + + /* calculate and set baud rate */ + baud = uart_get_baud_rate(port, termios, old, 900, 921600); + baud = vt8500_set_baud_rate(port, baud); + if (tty_termios_baud_rate(termios)) + tty_termios_encode_baud_rate(termios, baud, baud); + + /* calculate parity */ + lcr = vt8500_read(&vt8500_port->uart, VT8500_URLCR); + lcr &= ~((1 << 5) | (1 << 4)); + if (termios->c_cflag & PARENB) { + lcr |= (1 << 4); + termios->c_cflag &= ~CMSPAR; + if (termios->c_cflag & PARODD) + lcr |= (1 << 5); + } + + /* calculate bits per char */ + lcr &= ~(1 << 2); + switch (termios->c_cflag & CSIZE) { + case CS7: + break; + case CS8: + default: + lcr |= (1 << 2); + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + break; + } + + /* calculate stop bits */ + lcr &= ~(1 << 3); + if (termios->c_cflag & CSTOPB) + lcr |= (1 << 3); + + /* set parity, bits per char, and stop bit */ + vt8500_write(&vt8500_port->uart, lcr, VT8500_URLCR); + + /* Configure status bits to ignore based on termio flags. */ + port->read_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->read_status_mask = FER | PER; + + uart_update_timeout(port, termios->c_cflag, baud); + + /* Reset FIFOs */ + vt8500_write(&vt8500_port->uart, 0x88c, VT8500_URFCR); + while ((vt8500_read(&vt8500_port->uart, VT8500_URFCR) & 0xc) + && --loops) + cpu_relax(); + + /* Every possible FIFO-related interrupt */ + vt8500_port->ier = RX_FIFO_INTS | TX_FIFO_INTS; + + /* + * CTS flow control + */ + if (UART_ENABLE_MS(&vt8500_port->uart, termios->c_cflag)) + vt8500_port->ier |= TCTS; + + vt8500_write(&vt8500_port->uart, 0x881, VT8500_URFCR); + vt8500_write(&vt8500_port->uart, vt8500_port->ier, VT8500_URIER); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *vt8500_type(struct uart_port *port) +{ + struct vt8500_port *vt8500_port = + container_of(port, struct vt8500_port, uart); + return vt8500_port->name; +} + +static void vt8500_release_port(struct uart_port *port) +{ +} + +static int vt8500_request_port(struct uart_port *port) +{ + return 0; +} + +static void vt8500_config_port(struct uart_port *port, int flags) +{ + port->type = PORT_VT8500; +} + +static int vt8500_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + if (unlikely(ser->type != PORT_UNKNOWN && ser->type != PORT_VT8500)) + return -EINVAL; + if (unlikely(port->irq != ser->irq)) + return -EINVAL; + return 0; +} + +static struct vt8500_port *vt8500_uart_ports[4]; +static struct uart_driver vt8500_uart_driver; + +#ifdef CONFIG_SERIAL_VT8500_CONSOLE + +static inline void wait_for_xmitr(struct uart_port *port) +{ + unsigned int status, tmout = 10000; + + /* Wait up to 10ms for the character(s) to be sent. */ + do { + status = vt8500_read(port, VT8500_URFIDX); + + if (--tmout == 0) + break; + udelay(1); + } while (status & 0x10); +} + +static void vt8500_console_putchar(struct uart_port *port, int c) +{ + wait_for_xmitr(port); + writeb(c, port->membase + VT8500_TXFIFO); +} + +static void vt8500_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct vt8500_port *vt8500_port = vt8500_uart_ports[co->index]; + unsigned long ier; + + BUG_ON(co->index < 0 || co->index >= vt8500_uart_driver.nr); + + ier = vt8500_read(&vt8500_port->uart, VT8500_URIER); + vt8500_write(&vt8500_port->uart, VT8500_URIER, 0); + + uart_console_write(&vt8500_port->uart, s, count, + vt8500_console_putchar); + + /* + * Finally, wait for transmitter to become empty + * and switch back to FIFO + */ + wait_for_xmitr(&vt8500_port->uart); + vt8500_write(&vt8500_port->uart, VT8500_URIER, ier); +} + +static int __init vt8500_console_setup(struct console *co, char *options) +{ + struct vt8500_port *vt8500_port; + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (unlikely(co->index >= vt8500_uart_driver.nr || co->index < 0)) + return -ENXIO; + + vt8500_port = vt8500_uart_ports[co->index]; + + if (!vt8500_port) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + return uart_set_options(&vt8500_port->uart, + co, baud, parity, bits, flow); +} + +static struct console vt8500_console = { + .name = "ttyWMT", + .write = vt8500_console_write, + .device = uart_console_device, + .setup = vt8500_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &vt8500_uart_driver, +}; + +#define VT8500_CONSOLE (&vt8500_console) + +#else +#define VT8500_CONSOLE NULL +#endif + +static struct uart_ops vt8500_uart_pops = { + .tx_empty = vt8500_tx_empty, + .set_mctrl = vt8500_set_mctrl, + .get_mctrl = vt8500_get_mctrl, + .stop_tx = vt8500_stop_tx, + .start_tx = vt8500_start_tx, + .stop_rx = vt8500_stop_rx, + .enable_ms = vt8500_enable_ms, + .break_ctl = vt8500_break_ctl, + .startup = vt8500_startup, + .shutdown = vt8500_shutdown, + .set_termios = vt8500_set_termios, + .type = vt8500_type, + .release_port = vt8500_release_port, + .request_port = vt8500_request_port, + .config_port = vt8500_config_port, + .verify_port = vt8500_verify_port, +}; + +static struct uart_driver vt8500_uart_driver = { + .owner = THIS_MODULE, + .driver_name = "vt8500_serial", + .dev_name = "ttyWMT", + .nr = 6, + .cons = VT8500_CONSOLE, +}; + +static int __init vt8500_serial_probe(struct platform_device *pdev) +{ + struct vt8500_port *vt8500_port; + struct resource *mmres, *irqres; + int ret; + + mmres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irqres = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mmres || !irqres) + return -ENODEV; + + vt8500_port = kzalloc(sizeof(struct vt8500_port), GFP_KERNEL); + if (!vt8500_port) + return -ENOMEM; + + vt8500_port->uart.type = PORT_VT8500; + vt8500_port->uart.iotype = UPIO_MEM; + vt8500_port->uart.mapbase = mmres->start; + vt8500_port->uart.irq = irqres->start; + vt8500_port->uart.fifosize = 16; + vt8500_port->uart.ops = &vt8500_uart_pops; + vt8500_port->uart.line = pdev->id; + vt8500_port->uart.dev = &pdev->dev; + vt8500_port->uart.flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF; + vt8500_port->uart.uartclk = 24000000; + + snprintf(vt8500_port->name, sizeof(vt8500_port->name), + "VT8500 UART%d", pdev->id); + + vt8500_port->uart.membase = ioremap(mmres->start, + mmres->end - mmres->start + 1); + if (!vt8500_port->uart.membase) { + ret = -ENOMEM; + goto err; + } + + vt8500_uart_ports[pdev->id] = vt8500_port; + + uart_add_one_port(&vt8500_uart_driver, &vt8500_port->uart); + + platform_set_drvdata(pdev, vt8500_port); + + return 0; + +err: + kfree(vt8500_port); + return ret; +} + +static int __devexit vt8500_serial_remove(struct platform_device *pdev) +{ + struct vt8500_port *vt8500_port = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + uart_remove_one_port(&vt8500_uart_driver, &vt8500_port->uart); + kfree(vt8500_port); + + return 0; +} + +static struct platform_driver vt8500_platform_driver = { + .probe = vt8500_serial_probe, + .remove = vt8500_serial_remove, + .driver = { + .name = "vt8500_serial", + .owner = THIS_MODULE, + }, +}; + +static int __init vt8500_serial_init(void) +{ + int ret; + + ret = uart_register_driver(&vt8500_uart_driver); + if (unlikely(ret)) + return ret; + + ret = platform_driver_register(&vt8500_platform_driver); + + if (unlikely(ret)) + uart_unregister_driver(&vt8500_uart_driver); + + return ret; +} + +static void __exit vt8500_serial_exit(void) +{ +#ifdef CONFIG_SERIAL_VT8500_CONSOLE + unregister_console(&vt8500_console); +#endif + platform_driver_unregister(&vt8500_platform_driver); + uart_unregister_driver(&vt8500_uart_driver); +} + +module_init(vt8500_serial_init); +module_exit(vt8500_serial_exit); + +MODULE_AUTHOR("Alexey Charkov "); +MODULE_DESCRIPTION("Driver for vt8500 serial device"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 212eb4c6779..41603d69043 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -199,6 +199,9 @@ /* TI OMAP-UART */ #define PORT_OMAP 96 +/* VIA VT8500 SoC */ +#define PORT_VT8500 97 + #ifdef __KERNEL__ #include -- cgit v1.2.3-70-g09d2 From 1d7138de878d1d4210727c1200193e69596f93b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Nov 2010 05:46:50 +0000 Subject: igmp: RCU conversion of in_dev->mc_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in_dev->mc_list is protected by one rwlock (in_dev->mc_list_lock). This can easily be converted to a RCU protection. Writers hold RTNL, so mc_list_lock is removed, not replaced by a spinlock. Signed-off-by: Eric Dumazet Cc: Cypher Wu Cc: Américo Wang Signed-off-by: David S. Miller --- include/linux/igmp.h | 12 ++- include/linux/inetdevice.h | 5 +- include/net/inet_sock.h | 2 +- net/ipv4/igmp.c | 223 +++++++++++++++++++++------------------------ 4 files changed, 115 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 93fc2449af1..7d164670f26 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -167,10 +167,10 @@ struct ip_sf_socklist { */ struct ip_mc_socklist { - struct ip_mc_socklist *next; + struct ip_mc_socklist __rcu *next_rcu; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ - struct ip_sf_socklist *sflist; + struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; @@ -186,11 +186,14 @@ struct ip_sf_list { struct ip_mc_list { struct in_device *interface; __be32 multiaddr; + unsigned int sfmode; struct ip_sf_list *sources; struct ip_sf_list *tomb; - unsigned int sfmode; unsigned long sfcount[2]; - struct ip_mc_list *next; + union { + struct ip_mc_list *next; + struct ip_mc_list __rcu *next_rcu; + }; struct timer_list timer; int users; atomic_t refcnt; @@ -201,6 +204,7 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + struct rcu_head rcu; }; /* V3 exponential field decoding */ diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ccd5b07d678..380ba6bc5db 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -52,9 +52,8 @@ struct in_device { atomic_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ - rwlock_t mc_list_lock; - struct ip_mc_list *mc_list; /* IP multicast filter chain */ - int mc_count; /* Number of installed mcasts */ + struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1989cfd7405..8945f9fb192 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -141,7 +141,7 @@ struct inet_sock { nodefrag:1; int mc_index; __be32 mc_addr; - struct ip_mc_socklist *mc_list; + struct ip_mc_socklist __rcu *mc_list; struct { unsigned int flags; unsigned int fragsize; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08d0d81ffc1..6f49d6c087d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -149,11 +149,17 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_mc_list, rcu)); +} + static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + call_rcu(&im->rcu, ip_mc_list_reclaim); } } @@ -163,7 +169,7 @@ static void ip_ma_put(struct ip_mc_list *im) * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) @@ -496,14 +502,24 @@ empty_source: return skb; } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) { struct sk_buff *skb = NULL; int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +530,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -556,7 +572,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ @@ -593,7 +609,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -813,14 +829,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -906,8 +922,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -925,7 +941,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } /* called in rcu_read_lock() section */ @@ -1110,8 +1126,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); @@ -1123,7 +1139,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1209,7 +1225,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1217,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; @@ -1227,26 +1243,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next = in_dev->mc_list; - in_dev->mc_list = im; + + im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - write_unlock_bh(&in_dev->mc_list_lock); + rcu_assign_pointer(in_dev->mc_list, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1287,17 +1295,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; ASSERT_RTNL(); - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; + *ip = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1316,34 +1325,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); void ip_mc_unmap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); } void ip_mc_remap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1374,7 +1383,6 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1382,14 +1390,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1405,17 +1413,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); + igmp_group_dropped(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } /* RTNL is locked */ @@ -1513,18 +1517,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1685,18 +1689,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1793,7 +1797,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1807,7 +1811,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; rcu_assign_pointer(inet->mc_list, iml); @@ -1821,17 +1825,14 @@ EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { - struct ip_sf_socklist *psf; - - psf = container_of(rp, struct ip_sf_socklist, rcu); + kfree(container_of(rp, struct ip_sf_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(psf); } static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { - struct ip_sf_socklist *psf = iml->sflist; + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; if (psf == NULL) { @@ -1851,11 +1852,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, static void ip_mc_socklist_reclaim(struct rcu_head *rp) { - struct ip_mc_socklist *iml; - - iml = container_of(rp, struct ip_mc_socklist, rcu); + kfree(container_of(rp, struct ip_mc_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(iml); } @@ -1866,7 +1864,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; @@ -1876,7 +1875,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { if (iml->multi.imr_multiaddr.s_addr != group) continue; if (ifindex) { @@ -1888,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - rcu_assign_pointer(*imlp, iml->next); + *imlp = iml->next_rcu; if (in_dev) ip_mc_dec_group(in_dev, group); @@ -1934,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if ((pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr) && (pmc->multi.imr_ifindex == imr.imr_ifindex)) @@ -1958,7 +1959,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -2077,7 +2078,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2107,7 +2108,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -2155,7 +2156,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2163,7 +2164,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); if (!psl) { len = 0; @@ -2208,7 +2209,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2216,7 +2217,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2257,7 +2258,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) goto out; rcu_read_lock(); - for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; @@ -2265,7 +2266,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) ret = inet->mc_all; if (!pmc) goto unlock; - psl = pmc->sflist; + psl = rcu_dereference(pmc->sflist); ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) goto unlock; @@ -2300,10 +2301,10 @@ void ip_mc_drop_socket(struct sock *sk) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - rcu_assign_pointer(inet->mc_list, iml->next); + inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { @@ -2323,8 +2324,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; } @@ -2345,7 +2346,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return rv; } @@ -2371,13 +2372,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); } return im; } @@ -2385,11 +2384,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; - while (!im) { - if (likely(state->in_dev != NULL)) - read_unlock(&state->in_dev->mc_list_lock); + im = rcu_dereference(im->next_rcu); + while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; @@ -2398,8 +2395,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2435,10 +2431,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; rcu_read_unlock(); } @@ -2460,7 +2454,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } @@ -2519,8 +2513,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2531,7 +2524,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); } return psf; } @@ -2545,9 +2537,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) - read_unlock(&state->idev->mc_list_lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2556,8 +2545,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2603,10 +2591,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; rcu_read_unlock(); } -- cgit v1.2.3-70-g09d2 From 9fbbdde93231ad7f35c217aa6bbbc7995133f483 Mon Sep 17 00:00:00 2001 From: Erik Gilling Date: Thu, 11 Nov 2010 15:44:43 +0100 Subject: video: add fb_edid_add_monspecs for parsing extended edid information Modern monitors/tvs have more extended EDID information blocks which can contain extra detailed modes. This adds a fb_edid_add_monspecs function which drivers can use to parse those additions blocks. Signed-off-by: Erik Gilling Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/fbmon.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fb.h | 2 ++ 2 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c index 563a98b88e9..a0b5a93b72d 100644 --- a/drivers/video/fbmon.c +++ b/drivers/video/fbmon.c @@ -973,6 +973,63 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) DPRINTK("========================================\n"); } +void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) +{ + unsigned char *block; + struct fb_videomode *mode, *m; + int num = 0, i, first = 1; + + if (edid == NULL) + return; + + if (!edid_checksum(edid)) + return; + + if (edid[0] != 0x2) + return; + + mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL); + if (mode == NULL) + return; + + block = edid + edid[0x2]; + + DPRINTK(" Extended Detailed Timings\n"); + + for (i = 0; i < (128 - edid[0x2]) / DETAILED_TIMING_DESCRIPTION_SIZE; + i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) { + if (!(block[0] == 0x00 && block[1] == 0x00)) { + get_detailed_timing(block, &mode[num]); + if (first) { + mode[num].flag |= FB_MODE_IS_FIRST; + first = 0; + } + num++; + } + } + + /* Yikes, EDID data is totally useless */ + if (!num) { + kfree(mode); + return; + } + + m = kzalloc((specs->modedb_len + num) * + sizeof(struct fb_videomode), GFP_KERNEL); + + if (!m) { + kfree(mode); + return; + } + + memmove(m, specs->modedb, specs->modedb_len * sizeof(struct fb_videomode)); + memmove(m + specs->modedb_len, mode, num * sizeof(struct fb_videomode)); + kfree(mode); + kfree(specs->modedb); + specs->modedb = m; + specs->modedb_len = specs->modedb_len + num; +} + /* * VESA Generalized Timing Formula (GTF) */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 7fca3dc4e47..6f0274d96f0 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1092,6 +1092,8 @@ extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var); extern const unsigned char *fb_firmware_edid(struct device *device); extern void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs); +extern void fb_edid_add_monspecs(unsigned char *edid, + struct fb_monspecs *specs); extern void fb_destroy_modedb(struct fb_videomode *modedb); extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb); extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter); -- cgit v1.2.3-70-g09d2 From 0ad83f6882c41df1a7fa387086029e162038c1f2 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 11 Nov 2010 15:45:04 +0100 Subject: fbdev: when parsing E-EDID blocks, also use SVD entries Add parsing of E-EDID SVD entries. In this first version only a few CEA/EIA-861E modes are implemented, more can be added as needed. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/fbmon.c | 37 +++++++++++++++++++++++++++++++++---- drivers/video/modedb.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/fb.h | 1 + 3 files changed, 77 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c index b25399abcf4..4f57485f8c5 100644 --- a/drivers/video/fbmon.c +++ b/drivers/video/fbmon.c @@ -983,7 +983,8 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) unsigned char *block; struct fb_videomode *m; int num = 0, i; - u8 edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; + u8 svd[64], edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; + u8 pos = 4, svd_n = 0; if (!edid) return; @@ -995,6 +996,21 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) edid[2] < 4 || edid[2] > 128 - DETAILED_TIMING_DESCRIPTION_SIZE) return; + DPRINTK(" Short Video Descriptors\n"); + + while (pos < edid[2]) { + u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7; + pr_debug("Data block %u of %u bytes\n", type, len); + if (type == 2) + for (i = pos; i < pos + len; i++) { + u8 idx = edid[pos + i] & 0x7f; + svd[svd_n++] = idx; + pr_debug("N%sative mode #%d\n", + edid[pos + i] & 0x80 ? "" : "on-n", idx); + } + pos += len + 1; + } + block = edid + edid[2]; DPRINTK(" Extended Detailed Timings\n"); @@ -1005,10 +1021,10 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) edt[num++] = block - edid; /* Yikes, EDID data is totally useless */ - if (!num) + if (!(num + svd_n)) return; - m = kzalloc((specs->modedb_len + num) * + m = kzalloc((specs->modedb_len + num + svd_n) * sizeof(struct fb_videomode), GFP_KERNEL); if (!m) @@ -1023,9 +1039,22 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) pr_debug("Adding %ux%u@%u\n", m[i].xres, m[i].yres, m[i].refresh); } + for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) { + int idx = svd[i - specs->modedb_len - num]; + if (!idx || idx > 63) { + pr_warning("Reserved SVD code %d\n", idx); + } else if (idx > ARRAY_SIZE(cea_modes) || !cea_modes[idx].xres) { + pr_warning("Unimplemented SVD code %d\n", idx); + } else { + memcpy(&m[i], cea_modes + idx, sizeof(m[i])); + pr_debug("Adding SVD #%d: %ux%u@%u\n", idx, + m[i].xres, m[i].yres, m[i].refresh); + } + } + kfree(specs->modedb); specs->modedb = m; - specs->modedb_len = specs->modedb_len + num; + specs->modedb_len = specs->modedb_len + num + svd_n; } /* diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c index 0a4dbdc1693..9a0ae6ca542 100644 --- a/drivers/video/modedb.c +++ b/drivers/video/modedb.c @@ -278,6 +278,49 @@ static const struct fb_videomode modedb[] = { }; #ifdef CONFIG_FB_MODE_HELPERS +const struct fb_videomode cea_modes[64] = { + /* #1: 640x480p@59.94/60Hz */ + [1] = { + NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #3: 720x480p@59.94/60Hz */ + [3] = { + NULL, 60, 720, 480, 37037, 60, 16, 30, 9, 62, 6, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #5: 1920x1080i@59.94/60Hz */ + [5] = { + NULL, 60, 1920, 1080, 13763, 148, 88, 15, 2, 44, 5, 0, FB_VMODE_INTERLACED, 0, + }, + /* #7: 720(1440)x480iH@59.94/60Hz */ + [7] = { + NULL, 60, 1440, 480, 18554/*37108*/, 114, 38, 15, 4, 124, 3, 0, FB_VMODE_INTERLACED, 0, + }, + /* #9: 720(1440)x240pH@59.94/60Hz */ + [9] = { + NULL, 60, 1440, 240, 18554, 114, 38, 16, 4, 124, 3, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #18: 720x576pH@50Hz */ + [18] = { + NULL, 50, 720, 576, 37037, 68, 12, 39, 5, 64, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #19: 1280x720p@50Hz */ + [19] = { + NULL, 50, 1280, 720, 13468, 220, 440, 20, 5, 40, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #20: 1920x1080i@50Hz */ + [20] = { + NULL, 50, 1920, 1080, 13480, 148, 528, 15, 5, 528, 5, 0, FB_VMODE_INTERLACED, 0, + }, + /* #32: 1920x1080p@23.98/24Hz */ + [32] = { + NULL, 24, 1920, 1080, 13468, 148, 638, 36, 4, 44, 5, 0, FB_VMODE_NONINTERLACED, 0, + }, + /* #35: (2880)x480p4x@59.94/60Hz */ + [35] = { + NULL, 50, 2880, 480, 11100, 240, 64, 30, 9, 248, 6, 0, FB_VMODE_NONINTERLACED, 0, + }, +}; + const struct fb_videomode vesa_modes[] = { /* 0 640x350-85 VESA */ { NULL, 85, 640, 350, 31746, 96, 32, 60, 32, 64, 3, diff --git a/include/linux/fb.h b/include/linux/fb.h index 6f0274d96f0..e154a79b832 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1151,6 +1151,7 @@ struct fb_videomode { extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; +extern const struct fb_videomode cea_modes[64]; struct fb_modelist { struct list_head list; -- cgit v1.2.3-70-g09d2 From d83447f0944e73d690218d79c07762ffa4ceb9e4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 14 Nov 2010 17:25:46 +0100 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS, and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. Signed-off-by: Gerrit Renker --- include/linux/dccp.h | 2 ++ net/dccp/options.c | 24 ++++++++++++++++++++---- net/dccp/output.c | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 749f01ccd26..eed52bcd35d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,6 +462,7 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) + * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) @@ -503,6 +504,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + __u8 dccps_sync_scheduled:1; struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; diff --git a/net/dccp/options.c b/net/dccp/options.c index 7743df00f5b..dabd6ee34d4 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -427,6 +427,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const u16 buflen = dccp_ackvec_buflen(av); /* Figure out how many options do we need to represent the ackvec */ const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); @@ -435,10 +436,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) const unsigned char *tail, *from; unsigned char *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, + dccp_packet_name(dcb->dccpd_type)); return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; + } + /* + * Since Ack Vectors are variable-length, we can not always predict + * their size. To catch exception cases where the space is running out + * on the skb, a separate Sync is scheduled to carry the Ack Vector. + */ + if (len > DCCPAV_MIN_OPTLEN && + len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { + DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " + "MPS=%u ==> reduce payload size?\n", len, skb->len, + dcb->dccpd_opt_len, dp->dccps_mss_cache); + dp->dccps_sync_scheduled = 1; + return 0; + } + dcb->dccpd_opt_len += len; to = skb_push(skb, len); len = buflen; @@ -479,7 +495,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) /* * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. */ - if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) + if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) return -ENOBUFS; return 0; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5a..d96dd9d362a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) * any local drop will eventually be reported via receiver feedback. */ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); + + /* + * If the CCID needs to transfer additional header options out-of-band + * (e.g. Ack Vectors or feature-negotiation options), it activates this + * flag to schedule a Sync. The Sync will automatically incorporate all + * currently pending header options, thus clearing the backlog. + */ + if (dp->dccps_sync_scheduled) + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /** @@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; + /* + * Clear the flag in case the Sync was scheduled for out-of-band data, + * such as carrying a long Ack Vector. + */ + dccp_sk(sk)->dccps_sync_scheduled = 0; + dccp_transmit_skb(sk, skb); } -- cgit v1.2.3-70-g09d2 From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 Oct 2010 12:14:55 +0000 Subject: offloading: Force software GSO for multiple vlan tags. We currently use vlan_features to check for TSO support if there is a vlan tag. However, it's quite likely that the NIC is not able to do TSO when there is an arbitrary number of tags. Therefore if there is more than one tag (in-band or out-of-band), fall back to software emulation. Signed-off-by: Jesse Gross CC: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++---- net/core/dev.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 578debb801f..6e4cfbc53d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2239,6 +2239,8 @@ unsigned long netdev_fix_features(unsigned long features, const char *name); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; @@ -2254,10 +2256,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features) static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { if (skb_is_gso(skb)) { - int features = dev->features; - - if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci) - features &= dev->vlan_features; + int features = netif_get_vlan_features(skb, dev); return (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); diff --git a/net/core/dev.c b/net/core/dev.c index 368930a988e..8b500c3e029 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +{ + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!skb->vlan_tci) + return dev->features; + + if (protocol != htons(ETH_P_8021Q)) + return dev->features & dev->vlan_features; + else + return 0; +} + /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or -- cgit v1.2.3-70-g09d2 From 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 20 Oct 2010 10:16:58 -0700 Subject: rfkill: remove dead code The following code is defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: John W. Linville --- include/linux/rfkill.h | 31 ------------------------------- net/rfkill/core.c | 14 -------------- 2 files changed, 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 08c32e4f261..c6c608482cb 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -354,37 +354,6 @@ static inline bool rfkill_blocked(struct rfkill *rfkill) } #endif /* RFKILL || RFKILL_MODULE */ - -#ifdef CONFIG_RFKILL_LEDS -/** - * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED. - * This function might return a NULL pointer if registering of the - * LED trigger failed. Use this as "default_trigger" for the LED. - */ -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill); - -/** - * rfkill_set_led_trigger_name -- set the LED trigger name - * @rfkill: rfkill struct - * @name: LED trigger name - * - * This function sets the LED trigger name of the radio LED - * trigger that rfkill creates. It is optional, but if called - * must be called before rfkill_register() to be effective. - */ -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name); -#else -static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return NULL; -} - -static inline void -rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ -} -#endif - #endif /* __KERNEL__ */ #endif /* RFKILL_H */ diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 04f599089e6..0198191b756 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return rfkill->led_trigger.name; -} -EXPORT_SYMBOL(rfkill_get_led_trigger_name); - -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ - BUG_ON(!rfkill); - - rfkill->ledtrigname = name; -} -EXPORT_SYMBOL(rfkill_set_led_trigger_name); - static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname -- cgit v1.2.3-70-g09d2 From ca4ffe8f2848169a8ded0ea8a60b2d81925564c9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 20 Oct 2010 10:18:55 -0700 Subject: cfg80211: fix disabling channels based on hints After a module loads you will have loaded the world roaming regulatory domain or a custom regulatory domain. Further regulatory hints are welcomed and should be respected unless the regulatory hint is coming from a country IE as the IEEE spec allows for a country IE to be a subset of what is allowed by the local regulatory agencies. So disable all channels that do not fit a regulatory domain sent from a unless the hint is from a country IE and the country IE had no information about the band we are currently processing. This fixes a few regulatory issues, for example for drivers that depend on CRDA and had no 5 GHz freqencies allowed were not properly disabling 5 GHz at all, furthermore it also allows users to restrict devices further as was intended. If you recieve a country IE upon association we will also disable the channels that are not allowed if the country IE had at least one channel on the respective band we are procesing. This was the original intention behind this design but it was completely overlooked... Cc: David Quan Cc: Jouni Malinen cc: Easwar Krishnan Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 +++++- net/wireless/reg.c | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0edb2566c14..fb877b5621b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1307,7 +1307,11 @@ enum nl80211_bitrate_attr { * wireless core it thinks its knows the regulatory domain we should be in. * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an * 802.11 country information element with regulatory information it - * thinks we should consider. + * thinks we should consider. cfg80211 only processes the country + * code from the IE, and relies on the regulatory domain information + * structure pased by userspace (CRDA) from our wireless-regdb. + * If a channel is enabled but the country code indicates it should + * be disabled we disable the channel and re-enable it upon disassociation. */ enum nl80211_reg_initiator { NL80211_REGDOM_SET_BY_CORE, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1bc8131a518..8ab65f2afe7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -750,8 +750,26 @@ static void handle_channel(struct wiphy *wiphy, desired_bw_khz, ®_rule); - if (r) + if (r) { + /* + * We will disable all channels that do not match our + * recieved regulatory rule unless the hint is coming + * from a Country IE and the Country IE had no information + * about a band. The IEEE 802.11 spec allows for an AP + * to send only a subset of the regulatory rules allowed, + * so an AP in the US that only supports 2.4 GHz may only send + * a country IE with information for the 2.4 GHz band + * while 5 GHz is still supported. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + r == -ERANGE) + return; + + REG_DBG_PRINT("cfg80211: Disabling freq %d MHz\n", + chan->center_freq); + chan->flags = IEEE80211_CHAN_DISABLED; return; + } power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; -- cgit v1.2.3-70-g09d2 From c8aea565e8f715d9f10064b1cbfbc15bf75df501 Mon Sep 17 00:00:00 2001 From: Gery Kahn Date: Tue, 5 Oct 2010 16:09:05 +0200 Subject: wl1271: ref_clock cosmetic changes Cosmetic cleanup for ref_clock code while configured by board. Signed-off-by: Gery Kahn Signed-off-by: Luciano Coelho --- drivers/net/wireless/wl12xx/wl1271_boot.c | 10 ++++------ include/linux/wl12xx.h | 8 ++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/wl12xx/wl1271_boot.c b/drivers/net/wireless/wl12xx/wl1271_boot.c index b9102124209..5b190728ca5 100644 --- a/drivers/net/wireless/wl12xx/wl1271_boot.c +++ b/drivers/net/wireless/wl12xx/wl1271_boot.c @@ -471,20 +471,19 @@ int wl1271_boot(struct wl1271 *wl) { int ret = 0; u32 tmp, clk, pause; - int ref_clock = wl->ref_clock; wl1271_boot_hw_version(wl); - if (ref_clock == 0 || ref_clock == 2 || ref_clock == 4) + if (wl->ref_clock == 0 || wl->ref_clock == 2 || wl->ref_clock == 4) /* ref clk: 19.2/38.4/38.4-XTAL */ clk = 0x3; - else if (ref_clock == 1 || ref_clock == 3) + else if (wl->ref_clock == 1 || wl->ref_clock == 3) /* ref clk: 26/52 */ clk = 0x5; else return -EINVAL; - if (ref_clock != 0) { + if (wl->ref_clock != 0) { u16 val; /* Set clock type (open drain) */ val = wl1271_top_reg_read(wl, OCP_REG_CLK_TYPE); @@ -529,8 +528,7 @@ int wl1271_boot(struct wl1271 *wl) wl1271_debug(DEBUG_BOOT, "clk2 0x%x", clk); - /* 2 */ - clk |= (ref_clock << 1) << 4; + clk |= (wl->ref_clock << 1) << 4; wl1271_write32(wl, DRPW_SCRATCH_START, clk); wl1271_set_partition(wl, &part_table[PART_WORK]); diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 4f902e1908a..bebb8efea0a 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -24,6 +24,14 @@ #ifndef _LINUX_WL12XX_H #define _LINUX_WL12XX_H +/* The board reference clock values */ +enum { + WL12XX_REFCLOCK_19 = 0, /* 19.2 MHz */ + WL12XX_REFCLOCK_26 = 1, /* 26 MHz */ + WL12XX_REFCLOCK_38 = 2, /* 38.4 MHz */ + WL12XX_REFCLOCK_54 = 3, /* 54 MHz */ +}; + struct wl12xx_platform_data { void (*set_power)(bool enable); /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ -- cgit v1.2.3-70-g09d2 From 7919a57bc608140aa8614c19eac40c6916fb61d2 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 30 Aug 2010 19:04:01 +0000 Subject: bitops: Provide generic sign_extend32 function This patch moves code out from wireless drivers where two different functions are defined in three code locations for the same purpose and provides a common function to sign extend a 32-bit value. Signed-off-by: Andreas Herrmann Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/phy.c | 8 +------- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 12 ++++++------ drivers/net/wireless/ath/ath9k/ar9002_phy.c | 8 ++++---- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 12 ++++++------ drivers/net/wireless/ath/ath9k/hw.h | 6 ------ drivers/net/wireless/iwlwifi/iwl-4965.c | 20 ++------------------ include/linux/bitops.h | 11 +++++++++++ 7 files changed, 30 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index 219367884e6..6b43f535ff5 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -1102,18 +1102,12 @@ int ath5k_hw_channel(struct ath5k_hw *ah, struct ieee80211_channel *channel) PHY calibration \*****************/ -static int sign_extend(int val, const int nbits) -{ - int order = BIT(nbits-1); - return (val ^ order) - order; -} - static s32 ath5k_hw_read_measured_noise_floor(struct ath5k_hw *ah) { s32 val; val = ath5k_hw_reg_read(ah, AR5K_PHY_NF); - return sign_extend(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 9); + return sign_extend32(AR5K_REG_MS(val, AR5K_PHY_NF_MINCCA_PWR), 8); } void ath5k_hw_init_nfcal_hist(struct ath5k_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 777a602176f..c83a22cfbe1 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -1490,25 +1490,25 @@ static void ar5008_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA), AR_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH2_CCA), AR_PHY_CH2_MINCCA_PWR); - nfarray[2] = sign_extend(nf, 9); + nfarray[2] = sign_extend32(nf, 8); if (!IS_CHAN_HT40(ah->curchan)) return; nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR); - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR_PHY_CH1_EXT_MINCCA_PWR); - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH2_EXT_CCA), AR_PHY_CH2_EXT_MINCCA_PWR); - nfarray[5] = sign_extend(nf, 9); + nfarray[5] = sign_extend32(nf, 8); } /* diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c index c00cdc67b55..3fb97fdc124 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c @@ -473,21 +473,21 @@ static void ar9002_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA), AR9280_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR9280_PHY_EXT_MINCCA_PWR); if (IS_CHAN_HT40(ah->curchan)) - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); if (AR_SREV_9285(ah) || AR_SREV_9271(ah)) return; nf = MS(REG_READ(ah, AR_PHY_CH1_CCA), AR9280_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR9280_PHY_CH1_EXT_MINCCA_PWR); if (IS_CHAN_HT40(ah->curchan)) - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); } static void ar9002_hw_set_nf_limits(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 06a9c4cd2f4..44c5454b2ad 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1023,25 +1023,25 @@ static void ar9003_hw_do_getnf(struct ath_hw *ah, int16_t nf; nf = MS(REG_READ(ah, AR_PHY_CCA_0), AR_PHY_MINCCA_PWR); - nfarray[0] = sign_extend(nf, 9); + nfarray[0] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CCA_1), AR_PHY_CH1_MINCCA_PWR); - nfarray[1] = sign_extend(nf, 9); + nfarray[1] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_CCA_2), AR_PHY_CH2_MINCCA_PWR); - nfarray[2] = sign_extend(nf, 9); + nfarray[2] = sign_extend32(nf, 8); if (!IS_CHAN_HT40(ah->curchan)) return; nf = MS(REG_READ(ah, AR_PHY_EXT_CCA), AR_PHY_EXT_MINCCA_PWR); - nfarray[3] = sign_extend(nf, 9); + nfarray[3] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_1), AR_PHY_CH1_EXT_MINCCA_PWR); - nfarray[4] = sign_extend(nf, 9); + nfarray[4] = sign_extend32(nf, 8); nf = MS(REG_READ(ah, AR_PHY_EXT_CCA_2), AR_PHY_CH2_EXT_MINCCA_PWR); - nfarray[5] = sign_extend(nf, 9); + nfarray[5] = sign_extend32(nf, 8); } static void ar9003_hw_set_nf_limits(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index e5b72262fd9..f821a28bcda 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -825,12 +825,6 @@ static inline struct ath_hw_ops *ath9k_hw_ops(struct ath_hw *ah) return &ah->ops; } -static inline int sign_extend(int val, const int nbits) -{ - int order = BIT(nbits-1); - return (val ^ order) - order; -} - /* Initialization, Detach, Reset */ const char *ath9k_hw_probe(u16 vendorid, u16 devid); void ath9k_hw_deinit(struct ath_hw *ah); diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index cd14843878a..4748d067eb1 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -1686,22 +1686,6 @@ static void iwl4965_txq_update_byte_cnt_tbl(struct iwl_priv *priv, tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent; } -/** - * sign_extend - Sign extend a value using specified bit as sign-bit - * - * Example: sign_extend(9, 3) would return -7 as bit3 of 1001b is 1 - * and bit0..2 is 001b which when sign extended to 1111111111111001b is -7. - * - * @param oper value to sign extend - * @param index 0 based bit index (0<=index<32) to sign bit - */ -static s32 sign_extend(u32 oper, int index) -{ - u8 shift = 31 - index; - - return (s32)(oper << shift) >> shift; -} - /** * iwl4965_hw_get_temperature - return the calibrated temperature (in Kelvin) * @statistics: Provides the temperature reading from the uCode @@ -1739,9 +1723,9 @@ static int iwl4965_hw_get_temperature(struct iwl_priv *priv) * "initialize" ALIVE response. */ if (!test_bit(STATUS_TEMPERATURE, &priv->status)) - vt = sign_extend(R4, 23); + vt = sign_extend32(R4, 23); else - vt = sign_extend(le32_to_cpu(priv->_agn.statistics. + vt = sign_extend32(le32_to_cpu(priv->_agn.statistics. general.common.temperature), 23); IWL_DEBUG_TEMP(priv, "Calib values R[1-3]: %d %d %d R4: %d\n", R1, R2, R3, vt); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 827cc95711e..2184c6b97ae 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -109,6 +109,17 @@ static inline __u8 ror8(__u8 word, unsigned int shift) return (word >> shift) | (word << (8 - shift)); } +/** + * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit + * @value: value to sign extend + * @index: 0 based bit index (0<=index<32) to sign bit + */ +static inline __s32 sign_extend32(__u32 value, int index) +{ + __u8 shift = 31 - index; + return (__s32)(value << shift) >> shift; +} + static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) -- cgit v1.2.3-70-g09d2 From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 9 Nov 2010 10:47:38 +0000 Subject: net: Simplify RX queue allocation This patch move RX queue allocation to alloc_netdev_mq and freeing of the queues to free_netdev (symmetric to TX queue allocation). Each kobject RX queue takes a reference to the queue's device so that the device can't be freed before all the kobjects have been released-- this obviates the need for reference counts specific to RX queues. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- net/core/dev.c | 19 ++++++++++--------- net/core/net-sysfs.c | 7 ++----- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e4cfbc53d4..fccb11f879e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -592,8 +592,7 @@ struct netdev_rx_queue { struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; struct kobject kobj; - struct netdev_rx_queue *first; - atomic_t count; + struct net_device *dev; } ____cacheline_aligned_in_smp; #endif /* CONFIG_RPS */ diff --git a/net/core/dev.c b/net/core/dev.c index 75490670e0a..8725d168d1f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev) } dev->_rx = rx; - /* - * Set a pointer to first element in the array which holds the - * reference count. - */ for (i = 0; i < count; i++) - rx[i].first = rx; + rx[i].dev = dev; #endif return 0; } @@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = netif_alloc_rx_queues(dev); - if (ret) - goto out; - netdev_init_queues(dev); /* Init, if this function is available */ @@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; dev->real_num_rx_queues = queue_count; + if (netif_alloc_rx_queues(dev)) + goto free_pcpu; #endif dev->gso_max_size = GSO_MAX_SIZE; @@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif + free_p: kfree(p); return NULL; @@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif kfree(rcu_dereference_raw(dev->ingress_queue)); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a5ff5a89f37..3ba526b56fe 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = { static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); - struct netdev_rx_queue *first = queue->first; struct rps_map *map; struct rps_dev_flow_table *flow_table; @@ -719,8 +718,7 @@ static void rx_queue_release(struct kobject *kobj) if (flow_table) call_rcu(&flow_table->rcu, rps_dev_flow_table_release); - if (atomic_dec_and_test(&first->count)) - kfree(first); + dev_put(queue->dev); } static struct kobj_type rx_queue_ktype = { @@ -732,7 +730,6 @@ static struct kobj_type rx_queue_ktype = { static int rx_queue_add_kobject(struct net_device *net, int index) { struct netdev_rx_queue *queue = net->_rx + index; - struct netdev_rx_queue *first = queue->first; struct kobject *kobj = &queue->kobj; int error = 0; @@ -745,7 +742,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - atomic_inc(&first->count); + dev_hold(queue->dev); return error; } -- cgit v1.2.3-70-g09d2 From c59504ebc5baa628706d10c2d3c7e1f4bc3c2147 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 14 Nov 2010 17:04:57 +0000 Subject: include/linux/if_macvlan.h: Remove unnecessary semicolons Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 8a2fd66a8b5..ac96a2d7629 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -69,7 +69,7 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, rx_stats = this_cpu_ptr(vlan->rx_stats); if (likely(success)) { u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++;; + rx_stats->rx_packets++; rx_stats->rx_bytes += len; if (multicast) rx_stats->rx_multicast++; -- cgit v1.2.3-70-g09d2 From a386f99025f13b32502fe5dedf223c20d7283826 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 06:38:11 +0000 Subject: bridge: add proper RCU annotation to should_route_hook Add br_should_route_hook_t typedef, this is the only way we can get a clean RCU implementation for function pointer. Move route_hook to location where it is used. Signed-off-by: Eric Dumazet Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 4 +++- net/bridge/br.c | 4 ---- net/bridge/br_input.c | 10 +++++++--- net/bridge/netfilter/ebtable_broute.c | 3 ++- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 0d241a5c490..f7e73c338c4 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -102,7 +102,9 @@ struct __fdb_entry { #include extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -extern int (*br_should_route_hook)(struct sk_buff *skb); + +typedef int (*br_should_route_hook_t)(struct sk_buff *skb); +extern br_should_route_hook_t __rcu *br_should_route_hook; #endif diff --git a/net/bridge/br.c b/net/bridge/br.c index c8436fa3134..84bbb82599b 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,8 +22,6 @@ #include "br_private.h" -int (*br_should_route_hook)(struct sk_buff *skb); - static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; @@ -102,8 +100,6 @@ static void __exit br_deinit(void) br_fdb_fini(); } -EXPORT_SYMBOL(br_should_route_hook); - module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 25207a1f182..6f6d8e1b776 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,10 @@ /* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +/* Hook for brouter */ +br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; +EXPORT_SYMBOL(br_should_route_hook); + static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; @@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) { struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; - int (*rhook)(struct sk_buff *skb); + br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return skb; @@ -173,8 +177,8 @@ forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); - if (rhook != NULL) { - if (rhook(skb)) + if (rhook) { + if ((*rhook)(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ae3f106c390..1bcaf36ad61 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, ebt_broute); + rcu_assign_pointer(br_should_route_hook, + (br_should_route_hook_t *)ebt_broute); return 0; } -- cgit v1.2.3-70-g09d2 From 61391cde9eefac5cfcf6d214aa80c77e58b1626b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 15 Nov 2010 06:38:12 +0000 Subject: netdev: add rcu annotations to receive handler hook Suggested by Eric's bridge RCU changes. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fccb11f879e..b45c1b8b1d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -994,8 +994,8 @@ struct net_device { unsigned int real_num_rx_queues; #endif - rx_handler_func_t *rx_handler; - void *rx_handler_data; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; -- cgit v1.2.3-70-g09d2 From 8ffab51b3dfc54876f145f15b351c41f3f703195 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Nov 2010 21:14:04 +0000 Subject: macvlan: lockless tx path macvlan is a stacked device, like tunnels. We should use the lockless mechanism we are using in tunnels and loopback. This patch completely removes locking in TX path. tx stat counters are added into existing percpu stat structure, renamed from rx_stats to pcpu_stats. Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue capability) Note : rx_errors converted to a 32bit counter, like tx_dropped, since they dont need 64bit range. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Ben Greear Cc: Ben Hutchings Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 80 ++++++++++++++++++++-------------------------- include/linux/if_macvlan.h | 34 ++++++++++++-------- 2 files changed, 55 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0fc9dc7f20d..93f0ba25c80 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -243,18 +243,22 @@ xmit_world: netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); unsigned int len = skb->len; int ret; + const struct macvlan_dev *vlan = netdev_priv(dev); ret = macvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + struct macvlan_pcpu_stats *pcpu_stats; + pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->tx_packets++; + pcpu_stats->tx_bytes += len; + u64_stats_update_end(&pcpu_stats->syncp); + } else { + this_cpu_inc(vlan->pcpu_stats->tx_dropped); + } return ret; } EXPORT_SYMBOL_GPL(macvlan_start_xmit); @@ -414,14 +418,15 @@ static int macvlan_init(struct net_device *dev) dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; + dev->features |= NETIF_F_LLTX; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; dev->hard_header_len = lowerdev->hard_header_len; macvlan_set_lockdep_class(dev); - vlan->rx_stats = alloc_percpu(struct macvlan_rx_stats); - if (!vlan->rx_stats) + vlan->pcpu_stats = alloc_percpu(struct macvlan_pcpu_stats); + if (!vlan->pcpu_stats) return -ENOMEM; return 0; @@ -431,7 +436,7 @@ static void macvlan_uninit(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - free_percpu(vlan->rx_stats); + free_percpu(vlan->pcpu_stats); } static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, @@ -439,33 +444,38 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, { struct macvlan_dev *vlan = netdev_priv(dev); - dev_txq_stats_fold(dev, stats); - - if (vlan->rx_stats) { - struct macvlan_rx_stats *p, accum = {0}; - u64 rx_packets, rx_bytes, rx_multicast; + if (vlan->pcpu_stats) { + struct macvlan_pcpu_stats *p; + u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes; + u32 rx_errors = 0, tx_dropped = 0; unsigned int start; int i; for_each_possible_cpu(i) { - p = per_cpu_ptr(vlan->rx_stats, i); + p = per_cpu_ptr(vlan->pcpu_stats, i); do { start = u64_stats_fetch_begin_bh(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; rx_multicast = p->rx_multicast; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; } while (u64_stats_fetch_retry_bh(&p->syncp, start)); - accum.rx_packets += rx_packets; - accum.rx_bytes += rx_bytes; - accum.rx_multicast += rx_multicast; - /* rx_errors is an ulong, updated without syncp protection */ - accum.rx_errors += p->rx_errors; + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + stats->multicast += rx_multicast; + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + /* rx_errors & tx_dropped are u32, updated + * without syncp protection. + */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } - stats->rx_packets = accum.rx_packets; - stats->rx_bytes = accum.rx_bytes; - stats->rx_errors = accum.rx_errors; - stats->rx_dropped = accum.rx_errors; - stats->multicast = accum.rx_multicast; + stats->rx_errors = rx_errors; + stats->rx_dropped = rx_errors; + stats->tx_dropped = tx_dropped; } return stats; } @@ -601,25 +611,6 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int macvlan_get_tx_queues(struct net *net, - struct nlattr *tb[], - unsigned int *num_tx_queues, - unsigned int *real_num_tx_queues) -{ - struct net_device *real_dev; - - if (!tb[IFLA_LINK]) - return -EINVAL; - - real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - - *num_tx_queues = real_dev->num_tx_queues; - *real_num_tx_queues = real_dev->real_num_tx_queues; - return 0; -} - int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], int (*receive)(struct sk_buff *skb), @@ -743,7 +734,6 @@ int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ ops->priv_size = sizeof(struct macvlan_dev); - ops->get_tx_queues = macvlan_get_tx_queues; ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index ac96a2d7629..e28b2e4959d 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -25,19 +25,25 @@ struct macvlan_port; struct macvtap_queue; /** - * struct macvlan_rx_stats - MACVLAN percpu rx stats + * struct macvlan_pcpu_stats - MACVLAN percpu stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets + * @tx_packets: number of transmitted packets + * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters - * @rx_errors: number of errors + * @rx_errors: number of rx errors + * @tx_dropped: number of tx dropped packets */ -struct macvlan_rx_stats { +struct macvlan_pcpu_stats { u64 rx_packets; u64 rx_bytes; u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; struct u64_stats_sync syncp; - unsigned long rx_errors; + u32 rx_errors; + u32 tx_dropped; }; /* @@ -52,7 +58,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; - struct macvlan_rx_stats __percpu *rx_stats; + struct macvlan_pcpu_stats __percpu *pcpu_stats; enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); @@ -64,18 +70,18 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, bool multicast) { - struct macvlan_rx_stats *rx_stats; - - rx_stats = this_cpu_ptr(vlan->rx_stats); if (likely(success)) { - u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += len; + struct macvlan_pcpu_stats *pcpu_stats; + + pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += len; if (multicast) - rx_stats->rx_multicast++; - u64_stats_update_end(&rx_stats->syncp); + pcpu_stats->rx_multicast++; + u64_stats_update_end(&pcpu_stats->syncp); } else { - rx_stats->rx_errors++; + this_cpu_inc(vlan->pcpu_stats->rx_errors); } } -- cgit v1.2.3-70-g09d2 From a75d946f42ae1771424a9582129fc5182ff48a1b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 4 Nov 2010 16:20:20 +0100 Subject: console: move for_each_console to linux/console.h Move it out of printk.c so that we can use it all over the code. There are some potential users which will be converted to that macro in next patches. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 6 ++++++ kernel/printk.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 95cf6f08a59..875cfb1c813 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -126,6 +126,12 @@ struct console { struct console *next; }; +/* + * for_each_console() allows you to iterate on each console + */ +#define for_each_console(con) \ + for (con = console_drivers; con != NULL; con = con->next) + extern int console_set_on_cmdline; extern int add_preferred_console(char *name, int idx, char *options); diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c37..bf0420a92a1 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -42,12 +42,6 @@ #include -/* - * for_each_console() allows you to iterate on each console - */ -#define for_each_console(con) \ - for (con = console_drivers; con != NULL; con = con->next) - /* * Architectures can override it: */ -- cgit v1.2.3-70-g09d2 From 3dfbd044d0d99cad2fe50e4f6c79845703fa0558 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 4 Nov 2010 16:20:23 +0100 Subject: TTY: include termios.h in tty_driver.h We reference termios and termiox in tty_driver.h, but we do not include linux/termios.h where these are defined. Add the #include properly. Otherwise when we include tty_driver.h, we get compile errors. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Greg KH Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index db2d227694d..09678ed370f 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -235,6 +235,7 @@ #include #include #include +#include struct tty_struct; struct tty_driver; -- cgit v1.2.3-70-g09d2 From e44dcb6c377529805bbaae505d5b333daab69111 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 12 Nov 2010 19:47:47 +0100 Subject: serial: mpc52xx: make printout for type more generic The printout for the type should be just "5xxx", so 512x users won't wonder why they have a mpc52xx-type UART. Signed-off-by: Wolfram Sang Cc: Grant Likely Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/serial/mpc52xx_uart.c | 6 +++++- include/linux/serial_core.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index c4399e23565..126ec7f568e 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -838,7 +838,11 @@ mpc52xx_uart_set_termios(struct uart_port *port, struct ktermios *new, static const char * mpc52xx_uart_type(struct uart_port *port) { - return port->type == PORT_MPC52xx ? "MPC52xx PSC" : NULL; + /* + * We keep using PORT_MPC52xx for historic reasons although it applies + * for MPC512x, too, but print "MPC5xxx" to not irritate users + */ + return port->type == PORT_MPC52xx ? "MPC5xxx PSC" : NULL; } static void diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 41603d69043..9ff9b7db293 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -95,7 +95,7 @@ /* PPC CPM type number */ #define PORT_CPM 58 -/* MPC52xx type numbers */ +/* MPC52xx (and MPC512x) type numbers */ #define PORT_MPC52xx 59 /* IBM icom */ -- cgit v1.2.3-70-g09d2 From afe0cbf87500f0585d217deb8c6fd329793a7957 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 10 Nov 2010 12:50:50 +0900 Subject: cfg80211: Add nl80211 antenna configuration Allow setting of TX and RX antennas configuration via nl80211. The antenna configuration is defined as a bitmap of allowed antennas to use. This API can be used to mask out antennas which are not attached or should not be used for other reasons like regulatory concerns or special setups. Separate bitmaps are used for RX and TX to allow configuring different antennas for receiving and transmitting. Each bitmap is 32 bit long, each bit representing one antenna, starting with antenna 1 at the first bit. If an antenna bit is set, this means the driver is allowed to use this antenna for RX or TX respectively; if the bit is not set the hardware is not allowed to use this antenna. Using bitmaps has the benefit of allowing for a flexible configuration interface which can support many different configurations and which can be used for 802.11n as well as non-802.11n devices. Instead of relying on some hardware specific assumptions, drivers can use this information to know which antennas are actually attached to the system and derive their capabilities based on that. 802.11n devices should enable or disable chains, based on which antennas are present (If all antennas belonging to a particular chain are disabled, the entire chain should be disabled). HT capabilities (like STBC, TX Beamforming, Antenna selection) should be calculated based on the available chains after applying the antenna masks. Should a 802.11n device have diversity antennas attached to one of their chains, diversity can be enabled or disabled based on the antenna information. Non-802.11n drivers can use the antenna masks to select RX and TX antennas and to enable or disable antenna diversity. While covering chainmasks for 802.11n and the standard "legacy" modes "fixed antenna 1", "fixed antenna 2" and "diversity" this API also allows more rare, but useful configurations as follows: 1) Send on antenna 1, receive on antenna 2 (or vice versa). This can be used to have a low gain antenna for TX in order to keep within the regulatory constraints and a high gain antenna for RX in order to receive weaker signals ("speak softly, but listen harder"). This can be useful for building long-shot outdoor links. Another usage of this setup is having a low-noise pre-amplifier on antenna 1 and a power amplifier on the other antenna. This way transmit noise is mostly kept out of the low noise receive channel. (This would be bitmaps: tx 1 rx 2). 2) Another similar setup is: Use RX diversity on both antennas, but always send on antenna 1. Again that would allow us to benefit from a higher gain RX antenna, while staying within the legal limits. (This would be: tx 0 rx 3). 3) And finally there can be special experimental setups in research and development even with pre 802.11n hardware where more than 2 antennas are available. It's good to keep the API simple, yet flexible. Signed-off-by: Bruno Randolf -- v7: Made bitmasks 32 bit wide and rebased to latest wireless-testing. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++++++++++ include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index fb877b5621b..17c5c884925 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -804,6 +804,28 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly * means support for per-station GTKs. * + * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting. + * This can be used to mask out antennas which are not attached or should + * not be used for transmitting. If an antenna is not selected in this + * bitmap the hardware is not allowed to transmit on this antenna. + * + * Each bit represents one antenna, starting with antenna 1 at the first + * bit. Depending on which antennas are selected in the bitmap, 802.11n + * drivers can derive which chainmasks to use (if all antennas belonging to + * a particular chain are disabled this chain should be disabled) and if + * a chain has diversity antennas wether diversity should be used or not. + * HT capabilities (STBC, TX Beamforming, Antenna selection) can be + * derived from the available chains after applying the antenna mask. + * Non-802.11n drivers can derive wether to use diversity or not. + * Drivers may reject configurations or RX/TX mask combinations they cannot + * support by returning -EINVAL. + * + * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving. + * This can be used to mask out antennas which are not attached or should + * not be used for receiving. If an antenna is not selected in this bitmap + * the hardware should not be configured to receive on this antenna. + * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -973,6 +995,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORT_IBSS_RSN, + NL80211_ATTR_WIPHY_ANTENNA_TX, + NL80211_ATTR_WIPHY_ANTENNA_RX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e5702f5ac57..07425e648a0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1304,6 +1304,9 @@ struct cfg80211_ops { void (*mgmt_frame_register)(struct wiphy *wiphy, struct net_device *dev, u16 frame_type, bool reg); + + int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant); }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c506241f863..5e4dda4c0fd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -166,7 +166,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, + + [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -526,7 +530,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.rts_threshold); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, dev->wiphy.coverage_class); - NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -545,6 +548,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + if (dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); + if (!res) { + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant); + } + } + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -1024,6 +1037,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && + info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { + u32 tx_ant, rx_ant; + if (!rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto bad_res; + } + + tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); + rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { -- cgit v1.2.3-70-g09d2 From 885a46d0f7942d76c2f3860acb45f75237d3bb42 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 11 Nov 2010 15:07:22 +0100 Subject: cfg80211: add support for setting the ad-hoc multicast rate Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 17c5c884925..037b4e49889 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -826,6 +826,8 @@ enum nl80211_commands { * the hardware should not be configured to receive on this antenna. * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. * + * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -998,6 +1000,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_ANTENNA_TX, NL80211_ATTR_WIPHY_ANTENNA_RX, + NL80211_ATTR_MCAST_RATE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 07425e648a0..8fd9eebd0cc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -923,6 +923,7 @@ struct cfg80211_disassoc_request { * @privacy: this is a protected network, keys will be configured * after joining * @basic_rates: bitmap of basic rates to use when creating the IBSS + * @mcast_rate: multicast tx rate (in 100 kbps) */ struct cfg80211_ibss_params { u8 *ssid; @@ -934,6 +935,7 @@ struct cfg80211_ibss_params { u32 basic_rates; bool channel_fixed; bool privacy; + int mcast_rate; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e4dda4c0fd..60555384222 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -171,6 +171,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, + + [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3681,6 +3683,9 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[NL80211_ATTR_MCAST_RATE]) + ibss.mcast_rate = + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, -- cgit v1.2.3-70-g09d2 From 6ddf27cdbc218a412d7e993fdc08e30eec2042ce Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 15 Nov 2010 15:57:30 -0500 Subject: USB: make usb_mark_last_busy use pm_runtime_mark_last_busy Since the runtime-PM core already defines a .last_busy field in device.power, this patch uses it to replace the .last_busy field defined in usb_device and uses pm_runtime_mark_last_busy to implement usb_mark_last_busy. Signed-off-by: Ming Lei Reviewed-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 23 +++++++++++------------ drivers/usb/core/hcd-pci.c | 1 - drivers/usb/core/hcd.c | 1 - drivers/usb/core/hub.c | 1 - drivers/usb/core/message.c | 1 - include/linux/usb.h | 5 ++--- 6 files changed, 13 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index eda2d2c2545..0a63e968c68 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "usb.h" @@ -1329,7 +1328,7 @@ int usb_resume(struct device *dev, pm_message_t msg) pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); do_unbind_rebind(udev, DO_REBIND); } } @@ -1397,7 +1396,7 @@ void usb_autosuspend_device(struct usb_device *udev) { int status; - udev->last_busy = jiffies; + usb_mark_last_busy(udev); status = pm_runtime_put_sync(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), @@ -1482,7 +1481,7 @@ void usb_autopm_put_interface(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); int status; - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", @@ -1512,8 +1511,8 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) unsigned long last_busy; int status = 0; - last_busy = udev->last_busy; - udev->last_busy = jiffies; + last_busy = udev->dev.power.last_busy; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); @@ -1554,7 +1553,7 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } @@ -1641,7 +1640,7 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } @@ -1697,7 +1696,7 @@ static int autosuspend_check(struct usb_device *udev) * enough, queue a delayed autosuspend request. */ j = ACCESS_ONCE(jiffies); - suspend_time = udev->last_busy + udev->autosuspend_delay; + suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay; if (time_before(j, suspend_time)) { pm_schedule_suspend(&udev->dev, jiffies_to_msecs( round_jiffies_up_relative(suspend_time - j))); @@ -1725,13 +1724,13 @@ static int usb_runtime_suspend(struct device *dev) * away. */ if (status) { - udev->last_busy = jiffies + + udev->dev.power.last_busy = jiffies + (udev->autosuspend_delay == 0 ? HZ/2 : 0); } /* Prevent the parent from suspending immediately after */ else if (udev->parent) - udev->parent->last_busy = jiffies; + usb_mark_last_busy(udev->parent); return status; } @@ -1745,7 +1744,7 @@ static int usb_runtime_resume(struct device *dev) * and all its interfaces. */ status = usb_resume_both(udev, PMSG_AUTO_RESUME); - udev->last_busy = jiffies; + usb_mark_last_busy(udev); return status; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 3799573bd38..b55d46070a2 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 61800f77dac..e70aeaf3dc1 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 27115b45edc..7c2405eccc4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index f377e49fcb3..83248742382 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include /* for usbcore internals */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 35fe6ab222b..7d22b3340a7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -20,6 +20,7 @@ #include /* for struct completion */ #include /* for current && schedule_timeout */ #include /* for struct mutex */ +#include /* for runtime PM */ struct usb_device; struct usb_driver; @@ -407,7 +408,6 @@ struct usb_tt; * @quirks: quirks of the whole device * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended - * @last_busy: time of last use * @autosuspend_delay: in jiffies * @connect_time: time device was first connected * @do_remote_wakeup: remote wakeup should be enabled @@ -481,7 +481,6 @@ struct usb_device { unsigned long active_duration; #ifdef CONFIG_PM - unsigned long last_busy; int autosuspend_delay; unsigned long connect_time; @@ -527,7 +526,7 @@ extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf); static inline void usb_mark_last_busy(struct usb_device *udev) { - udev->last_busy = jiffies; + pm_runtime_mark_last_busy(&udev->dev); } #else -- cgit v1.2.3-70-g09d2 From fcc4a01eb8661226e80632327673f67bf6a5840b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Nov 2010 15:57:51 -0500 Subject: USB: use the runtime-PM autosuspend implementation This patch (as1428) converts USB over to the new runtime-PM core autosuspend framework. One slightly awkward aspect of the conversion is that USB devices will now have two suspend-delay attributes: the old power/autosuspend file and the new power/autosuspend_delay_ms file. One expresses the delay time in seconds and the other in milliseconds, but otherwise they do the same thing. The old attribute can be deprecated and then removed eventually. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/power-management.txt | 113 +++++++++++++++++---------------- drivers/usb/core/driver.c | 77 ++-------------------- drivers/usb/core/hub.c | 1 + drivers/usb/core/quirks.c | 6 -- drivers/usb/core/sysfs.c | 34 ++-------- drivers/usb/core/usb.c | 3 +- drivers/usb/core/usb.h | 2 - include/linux/usb.h | 2 - 8 files changed, 71 insertions(+), 167 deletions(-) (limited to 'include/linux') diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index b29d8e56cf2..c9ffa9ced7e 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -2,7 +2,7 @@ Alan Stern - December 11, 2009 + October 28, 2010 @@ -107,9 +107,14 @@ allowed to issue dynamic suspends. The user interface for controlling dynamic PM is located in the power/ subdirectory of each USB device's sysfs directory, that is, in /sys/bus/usb/devices/.../power/ where "..." is the device's ID. The -relevant attribute files are: wakeup, control, and autosuspend. -(There may also be a file named "level"; this file was deprecated -as of the 2.6.35 kernel and replaced by the "control" file.) +relevant attribute files are: wakeup, control, and +autosuspend_delay_ms. (There may also be a file named "level"; this +file was deprecated as of the 2.6.35 kernel and replaced by the +"control" file. In 2.6.38 the "autosuspend" file will be deprecated +and replaced by the "autosuspend_delay_ms" file. The only difference +is that the newer file expresses the delay in milliseconds whereas the +older file uses seconds. Confusingly, both files are present in 2.6.37 +but only "autosuspend" works.) power/wakeup @@ -140,33 +145,36 @@ as of the 2.6.35 kernel and replaced by the "control" file.) suspended and autoresume was not allowed. This setting is no longer supported.) - power/autosuspend + power/autosuspend_delay_ms This file contains an integer value, which is the - number of seconds the device should remain idle before - the kernel will autosuspend it (the idle-delay time). - The default is 2. 0 means to autosuspend as soon as - the device becomes idle, and negative values mean - never to autosuspend. You can write a number to the - file to change the autosuspend idle-delay time. - -Writing "-1" to power/autosuspend and writing "on" to power/control do -essentially the same thing -- they both prevent the device from being -autosuspended. Yes, this is a redundancy in the API. + number of milliseconds the device should remain idle + before the kernel will autosuspend it (the idle-delay + time). The default is 2000. 0 means to autosuspend + as soon as the device becomes idle, and negative + values mean never to autosuspend. You can write a + number to the file to change the autosuspend + idle-delay time. + +Writing "-1" to power/autosuspend_delay_ms and writing "on" to +power/control do essentially the same thing -- they both prevent the +device from being autosuspended. Yes, this is a redundancy in the +API. (In 2.6.21 writing "0" to power/autosuspend would prevent the device from being autosuspended; the behavior was changed in 2.6.22. The power/autosuspend attribute did not exist prior to 2.6.21, and the power/level attribute did not exist prior to 2.6.22. power/control -was added in 2.6.34.) +was added in 2.6.34, and power/autosuspend_delay_ms was added in +2.6.37 but did not become functional until 2.6.38.) Changing the default idle-delay time ------------------------------------ -The default autosuspend idle-delay time is controlled by a module -parameter in usbcore. You can specify the value when usbcore is -loaded. For example, to set it to 5 seconds instead of 2 you would +The default autosuspend idle-delay time (in seconds) is controlled by +a module parameter in usbcore. You can specify the value when usbcore +is loaded. For example, to set it to 5 seconds instead of 2 you would do: modprobe usbcore autosuspend=5 @@ -234,25 +242,23 @@ every device. If a driver knows that its device has proper suspend/resume support, it can enable autosuspend all by itself. For example, the video -driver for a laptop's webcam might do this, since these devices are -rarely used and so should normally be autosuspended. +driver for a laptop's webcam might do this (in recent kernels they +do), since these devices are rarely used and so should normally be +autosuspended. Sometimes it turns out that even when a device does work okay with -autosuspend there are still problems. For example, there are -experimental patches adding autosuspend support to the usbhid driver, -which manages keyboards and mice, among other things. Tests with a -number of keyboards showed that typing on a suspended keyboard, while -causing the keyboard to do a remote wakeup all right, would -nonetheless frequently result in lost keystrokes. Tests with mice -showed that some of them would issue a remote-wakeup request in -response to button presses but not to motion, and some in response to -neither. +autosuspend there are still problems. For example, the usbhid driver, +which manages keyboards and mice, has autosuspend support. Tests with +a number of keyboards show that typing on a suspended keyboard, while +causing the keyboard to do a remote wakeup all right, will nonetheless +frequently result in lost keystrokes. Tests with mice show that some +of them will issue a remote-wakeup request in response to button +presses but not to motion, and some in response to neither. The kernel will not prevent you from enabling autosuspend on devices that can't handle it. It is even possible in theory to damage a -device by suspending it at the wrong time -- for example, suspending a -USB hard disk might cause it to spin down without parking the heads. -(Highly unlikely, but possible.) Take care. +device by suspending it at the wrong time. (Highly unlikely, but +possible.) Take care. The driver interface for Power Management @@ -336,10 +342,6 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -(There is a similar usage counter field in struct usb_device, -associated with the device itself rather than any of its interfaces. -This counter is used only by the USB core.) - Drivers need not be concerned about balancing changes to the usage counter; the USB core will undo any remaining "get"s when a driver is unbound from its interface. As a corollary, drivers must not call @@ -409,11 +411,11 @@ during autosuspend. For example, there's not much point autosuspending a keyboard if the user can't cause the keyboard to do a remote wakeup by typing on it. If the driver sets intf->needs_remote_wakeup to 1, the kernel won't autosuspend the -device if remote wakeup isn't available or has been disabled through -the power/wakeup attribute. (If the device is already autosuspended, -though, setting this flag won't cause the kernel to autoresume it. -Normally a driver would set this flag in its probe method, at which -time the device is guaranteed not to be autosuspended.) +device if remote wakeup isn't available. (If the device is already +autosuspended, though, setting this flag won't cause the kernel to +autoresume it. Normally a driver would set this flag in its probe +method, at which time the device is guaranteed not to be +autosuspended.) If a driver does its I/O asynchronously in interrupt context, it should call usb_autopm_get_interface_async() before starting output and @@ -422,20 +424,19 @@ it receives an input event, it should call usb_mark_last_busy(struct usb_device *udev); -in the event handler. This sets udev->last_busy to the current time. -udev->last_busy is the field used for idle-delay calculations; -updating it will cause any pending autosuspend to be moved back. Most -of the usb_autopm_* routines will also set the last_busy field to the -current time. +in the event handler. This tells the PM core that the device was just +busy and therefore the next autosuspend idle-delay expiration should +be pushed back. Many of the usb_autopm_* routines also make this call, +so drivers need to worry only when interrupt-driven input arrives. Asynchronous operation is always subject to races. For example, a -driver may call one of the usb_autopm_*_interface_async() routines at -a time when the core has just finished deciding the device has been -idle for long enough but not yet gotten around to calling the driver's -suspend method. The suspend method must be responsible for -synchronizing with the output request routine and the URB completion -handler; it should cause autosuspends to fail with -EBUSY if the -driver needs to use the device. +driver may call the usb_autopm_get_interface_async() routine at a time +when the core has just finished deciding the device has been idle for +long enough but not yet gotten around to calling the driver's suspend +method. The suspend method must be responsible for synchronizing with +the I/O request routine and the URB completion handler; it should +cause autosuspends to fail with -EBUSY if the driver needs to use the +device. External suspend calls should never be allowed to fail in this way, only autosuspend calls. The driver can tell them apart by checking @@ -472,7 +473,9 @@ Firstly, a device may already be autosuspended when a system suspend occurs. Since system suspends are supposed to be as transparent as possible, the device should remain suspended following the system resume. But this theory may not work out well in practice; over time -the kernel's behavior in this regard has changed. +the kernel's behavior in this regard has changed. As of 2.6.37 the +policy is to resume all devices during a system resume and let them +handle their own runtime suspends afterward. Secondly, a dynamic power-management event may occur as a system suspend is underway. The window for this is short, since system diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 0a63e968c68..43c25c29ac1 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1397,32 +1397,7 @@ void usb_autosuspend_device(struct usb_device *udev) int status; usb_mark_last_busy(udev); - status = pm_runtime_put_sync(&udev->dev); - dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", - __func__, atomic_read(&udev->dev.power.usage_count), - status); -} - -/** - * usb_try_autosuspend_device - attempt an autosuspend of a USB device and its interfaces - * @udev: the usb_device to autosuspend - * - * This routine should be called when a core subsystem thinks @udev may - * be ready to autosuspend. - * - * @udev's usage counter left unchanged. If it is 0 and all the interfaces - * are inactive then an autosuspend will be attempted. The attempt may - * fail or be delayed. - * - * The caller must hold @udev's device lock. - * - * This routine can run only in process context. - */ -void usb_try_autosuspend_device(struct usb_device *udev) -{ - int status; - - status = pm_runtime_idle(&udev->dev); + status = pm_runtime_put_sync_autosuspend(&udev->dev); dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&udev->dev.power.usage_count), status); @@ -1508,32 +1483,11 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface); void usb_autopm_put_interface_async(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - unsigned long last_busy; - int status = 0; + int status; - last_busy = udev->dev.power.last_busy; usb_mark_last_busy(udev); atomic_dec(&intf->pm_usage_cnt); - pm_runtime_put_noidle(&intf->dev); - - if (udev->dev.power.runtime_auto) { - /* Optimization: Don't schedule a delayed autosuspend if - * the timer is already running and the expiration time - * wouldn't change. - * - * We have to use the interface's timer. Attempts to - * schedule a suspend for the device would fail because - * the interface is still active. - */ - if (intf->dev.power.timer_expires == 0 || - round_jiffies_up(last_busy) != - round_jiffies_up(jiffies)) { - status = pm_schedule_suspend(&intf->dev, - jiffies_to_msecs( - round_jiffies_up_relative( - udev->autosuspend_delay))); - } - } + status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1651,7 +1605,6 @@ static int autosuspend_check(struct usb_device *udev) { int w, i; struct usb_interface *intf; - unsigned long suspend_time, j; /* Fail if autosuspend is disabled, or any interfaces are in use, or * any interface drivers require remote wakeup but it isn't available. @@ -1691,17 +1644,6 @@ static int autosuspend_check(struct usb_device *udev) return -EOPNOTSUPP; } udev->do_remote_wakeup = w; - - /* If everything is okay but the device hasn't been idle for long - * enough, queue a delayed autosuspend request. - */ - j = ACCESS_ONCE(jiffies); - suspend_time = udev->dev.power.last_busy + udev->autosuspend_delay; - if (time_before(j, suspend_time)) { - pm_schedule_suspend(&udev->dev, jiffies_to_msecs( - round_jiffies_up_relative(suspend_time - j))); - return -EAGAIN; - } return 0; } @@ -1719,17 +1661,8 @@ static int usb_runtime_suspend(struct device *dev) status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND); - /* If an interface fails the suspend, adjust the last_busy - * time so that we don't get another suspend attempt right - * away. - */ - if (status) { - udev->dev.power.last_busy = jiffies + - (udev->autosuspend_delay == 0 ? HZ/2 : 0); - } - /* Prevent the parent from suspending immediately after */ - else if (udev->parent) + if (status == 0 && udev->parent) usb_mark_last_busy(udev->parent); return status; @@ -1756,7 +1689,7 @@ static int usb_runtime_idle(struct device *dev) * autosuspend checks. */ if (autosuspend_check(udev) == 0) - pm_runtime_suspend(dev); + pm_runtime_autosuspend(dev); return 0; } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7c2405eccc4..fdb62ca10d8 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1803,6 +1803,7 @@ int usb_new_device(struct usb_device *udev) /* Tell the runtime-PM framework the device is active */ pm_runtime_set_active(&udev->dev); + pm_runtime_use_autosuspend(&udev->dev); pm_runtime_enable(&udev->dev); err = usb_enumerate_device(udev); /* Read descriptors */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 25719da45e3..e3531da1613 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -124,12 +124,6 @@ void usb_detect_quirks(struct usb_device *udev) */ usb_disable_autosuspend(udev); - /* Autosuspend can also be disabled if the initial autosuspend_delay - * is negative. - */ - if (udev->autosuspend_delay < 0) - usb_autoresume_device(udev); - #endif /* For the present, all devices default to USB-PERSIST enabled */ diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 9561e087907..6781c369ce2 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -334,44 +334,20 @@ static DEVICE_ATTR(active_duration, S_IRUGO, show_active_duration, NULL); static ssize_t show_autosuspend(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_device *udev = to_usb_device(dev); - - return sprintf(buf, "%d\n", udev->autosuspend_delay / HZ); + return sprintf(buf, "%d\n", dev->power.autosuspend_delay / 1000); } static ssize_t set_autosuspend(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct usb_device *udev = to_usb_device(dev); - int value, old_delay; - int rc; + int value; - if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/HZ || - value <= - INT_MAX/HZ) + if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 || + value <= -INT_MAX/1000) return -EINVAL; - value *= HZ; - - usb_lock_device(udev); - old_delay = udev->autosuspend_delay; - udev->autosuspend_delay = value; - - if (old_delay < 0) { /* Autosuspend wasn't allowed */ - if (value >= 0) - usb_autosuspend_device(udev); - } else { /* Autosuspend was allowed */ - if (value < 0) { - rc = usb_autoresume_device(udev); - if (rc < 0) { - count = rc; - udev->autosuspend_delay = old_delay; - } - } else { - usb_try_autosuspend_device(udev); - } - } - usb_unlock_device(udev); + pm_runtime_set_autosuspend_delay(dev, value * 1000); return count; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index fdd4130fbb7..079cb57bab4 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -445,7 +445,8 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, INIT_LIST_HEAD(&dev->filelist); #ifdef CONFIG_PM - dev->autosuspend_delay = usb_autosuspend_delay * HZ; + pm_runtime_set_autosuspend_delay(&dev->dev, + usb_autosuspend_delay * 1000); dev->connect_time = jiffies; dev->active_duration = -jiffies; #endif diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index cd882203ad3..b975450f403 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -75,14 +75,12 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg) #ifdef CONFIG_USB_SUSPEND extern void usb_autosuspend_device(struct usb_device *udev); -extern void usb_try_autosuspend_device(struct usb_device *udev); extern int usb_autoresume_device(struct usb_device *udev); extern int usb_remote_wakeup(struct usb_device *dev); #else #define usb_autosuspend_device(udev) do {} while (0) -#define usb_try_autosuspend_device(udev) do {} while (0) static inline int usb_autoresume_device(struct usb_device *udev) { return 0; diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d22b3340a7..5ee2223af08 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -408,7 +408,6 @@ struct usb_tt; * @quirks: quirks of the whole device * @urbnum: number of URBs submitted for the whole device * @active_duration: total time device is not suspended - * @autosuspend_delay: in jiffies * @connect_time: time device was first connected * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume @@ -481,7 +480,6 @@ struct usb_device { unsigned long active_duration; #ifdef CONFIG_PM - int autosuspend_delay; unsigned long connect_time; unsigned do_remote_wakeup:1; -- cgit v1.2.3-70-g09d2 From da6836500414ae734cd9873c2d553db594f831e9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 16 Nov 2010 11:52:38 +0000 Subject: netfilter: allow hooks to pass error code back up the stack SELinux would like to pass certain fatal errors back up the stack. This patch implements the generic netfilter support for this functionality. Based-on-patch-by: Patrick McHardy Signed-off-by: Eric Paris Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ net/netfilter/core.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 03317c8d407..1893837b396 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -33,6 +33,8 @@ #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) +#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 85dabb86be6..32fcbe290c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -173,9 +173,11 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - } else if (verdict == NF_DROP) { + } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { kfree_skb(skb); - ret = -EPERM; + ret = -(verdict >> NF_VERDICT_BITS); + if (ret == 0) + ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) -- cgit v1.2.3-70-g09d2 From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:30:14 +0000 Subject: rtnetlink: Link address family API Each net_device contains address family specific data such as per device settings and statistics. We already expose this data via procfs/sysfs and partially netlink. The netlink method requires the requester to send one RTM_GETLINK request for each address family it wishes to receive data of and then merge this data itself. This patch implements a new API which combines all address family specific link data in a new netlink attribute IFLA_AF_SPEC. IFLA_AF_SPEC contains a sequence of nested attributes, one for each address family which in turn defines the structure of its own attribute. Example: [IFLA_AF_SPEC] = { [AF_INET] = { [IFLA_INET_CONF] = ..., }, [AF_INET6] = { [IFLA_INET6_FLAGS] = ..., [IFLA_INET6_CONF] = ..., } } The API also allows for address families to implement a function which parses the IFLA_AF_SPEC attribute sent by userspace to implement address family specific link options. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_link.h | 19 +++++++ include/net/rtnetlink.h | 31 ++++++++++ net/core/rtnetlink.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2fc66dd783e..443d04a66a7 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -80,6 +80,24 @@ struct rtnl_link_ifmap { __u8 port; }; +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + enum { IFLA_UNSPEC, IFLA_ADDRESS, @@ -116,6 +134,7 @@ enum { IFLA_STATS64, IFLA_VF_PORTS, IFLA_PORT_SELF, + IFLA_AF_SPEC, __IFLA_MAX }; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e013c68bfb0..35be0bbcd7d 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -83,6 +83,37 @@ extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); +/** + * struct rtnl_af_ops - rtnetlink address family operations + * + * @list: Used internally + * @family: Address family + * @fill_link_af: Function to fill IFLA_AF_SPEC with address family + * specific netlink attributes. + * @get_link_af_size: Function to calculate size of address family specific + * netlink attributes exlusive the container attribute. + * @parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify + * net_device accordingly. + */ +struct rtnl_af_ops { + struct list_head list; + int family; + + int (*fill_link_af)(struct sk_buff *skb, + const struct net_device *dev); + size_t (*get_link_af_size)(const struct net_device *dev); + + int (*parse_link_af)(struct net_device *dev, + const struct nlattr *attr); +}; + +extern int __rtnl_af_register(struct rtnl_af_ops *ops); +extern void __rtnl_af_unregister(struct rtnl_af_ops *ops); + +extern int rtnl_af_register(struct rtnl_af_ops *ops); +extern void rtnl_af_unregister(struct rtnl_af_ops *ops); + + extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 841c287ef40..bf69e5871b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev) return size; } +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } + + return size; +} + static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + continue; + + if (!af_ops->parse_link_af) + continue; + + err = af_ops->parse_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: -- cgit v1.2.3-70-g09d2 From ca7479ebbd9f7621646bf2792cb7143647f035bb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:31:20 +0000 Subject: inet: Define IPV4_DEVCONF_MAX Define IPV4_DEVCONF_MAX to get rid of MAX - 1 notation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 380ba6bc5db..2b86eaf1177 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -41,10 +41,12 @@ enum __IPV4_DEVCONF_MAX }; +#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1) + struct ipv4_devconf { void *sysctl; - int data[__IPV4_DEVCONF_MAX - 1]; - DECLARE_BITMAP(state, __IPV4_DEVCONF_MAX - 1); + int data[IPV4_DEVCONF_MAX]; + DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); }; struct in_device { @@ -90,7 +92,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, static inline void ipv4_devconf_setall(struct in_device *in_dev) { - bitmap_fill(in_dev->cnf.state, __IPV4_DEVCONF_MAX - 1); + bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX); } #define IN_DEV_CONF_GET(in_dev, attr) \ -- cgit v1.2.3-70-g09d2 From 9f0f7272ac9506f4c8c05cc597b7e376b0b9f3e4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:32:48 +0000 Subject: ipv4: AF_INET link address family Implements the AF_INET link address family exposing the per device configuration settings via netlink using the attribute IFLA_INET_CONF. The format of IFLA_INET_CONF differs depending on the direction the attribute is sent. The attribute sent by the kernel consists of a u32 array, basically a 1:1 copy of in_device->cnf.data[]. The attribute expected by the kernel must consist of a sequence of nested u32 attributes, each representing a change request, e.g. [IFLA_INET_CONF] = { [IPV4_DEVCONF_FORWARDING] = 1, [IPV4_DEVCONF_NOXFRM] = 0, } libnl userspace API documentation and example available from: http://www.infradead.org/~tgr/libnl/doc-git/group__link__inet.html Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_link.h | 8 ++++++ net/ipv4/devinet.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 443d04a66a7..2e02e4d7b11 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -147,6 +147,14 @@ enum { #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) #endif +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + /* ifi_flags. IFF_* flags. diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dc94b0316b7..71afc26c2df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1256,6 +1256,72 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } +static size_t inet_get_link_af_size(const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return 0; + + return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ +} + +static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + struct nlattr *nla; + int i; + + if (!in_dev) + return -ENODATA; + + nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); + if (nla == NULL) + return -EMSGSIZE; + + for (i = 0; i < IPV4_DEVCONF_MAX; i++) + ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + + return 0; +} + +static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { + [IFLA_INET_CONF] = { .type = NLA_NESTED }, +}; + +static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int err, rem; + + if (!in_dev) + return -EOPNOTSUPP; + + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + if (err < 0) + return err; + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { + int cfgid = nla_type(a); + + if (nla_len(a) < 4) + return -EINVAL; + + if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) + return -EINVAL; + } + } + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) + ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); + } + + return 0; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1619,6 +1685,13 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; +static struct rtnl_af_ops inet_af_ops = { + .family = AF_INET, + .fill_link_af = inet_fill_link_af, + .get_link_af_size = inet_get_link_af_size, + .parse_link_af = inet_parse_link_af, +}; + void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); @@ -1626,6 +1699,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + rtnl_af_register(&inet_af_ops); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); -- cgit v1.2.3-70-g09d2 From b2c0710c464ede15e1fc52fb1e7ee9ba54cea186 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 9 Sep 2010 13:40:39 -0700 Subject: rcu: move TINY_RCU from softirq to kthread If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 8 ++---- include/linux/rcutree.h | 1 + kernel/rcutiny.c | 71 +++++++++++++++++++++++++++++++++++++++--------- kernel/rcutiny_plugin.h | 15 +++++----- 5 files changed, 70 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03cda7bed98..7142ee3304a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -118,7 +118,6 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ -extern void rcu_init(void); extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_check_callbacks(int cpu, int user); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 13877cb93a6..ea025a611fc 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,7 +27,9 @@ #include -#define rcu_init_sched() do { } while (0) +static inline void rcu_init(void) +{ +} #ifdef CONFIG_TINY_RCU @@ -125,16 +127,12 @@ static inline void rcu_cpu_stall_reset(void) } #ifdef CONFIG_DEBUG_LOCK_ALLOC - extern int rcu_scheduler_active __read_mostly; extern void rcu_scheduler_starting(void); - #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - static inline void rcu_scheduler_starting(void) { } - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 95518e62879..c0e96833aa7 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,6 +30,7 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H +extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342a..86eef29cdfb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,8 +59,15 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_cbs_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); +static unsigned long have_rcu_cbs; +static void invoke_rcu_cbs(void); + /* Forward declarations for rcutiny_plugin.h. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static int rcu_cbs(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -123,7 +130,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -132,7 +139,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -152,10 +159,10 @@ void rcu_check_callbacks(int cpu, int user) } /* - * Helper function for rcu_process_callbacks() that operates on the - * specified rcu_ctrlkblk structure. + * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure + * whose grace period has elapsed. */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) { struct rcu_head *next, *list; unsigned long flags; @@ -180,19 +187,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); + local_bh_disable(); list->func(list); + local_bh_enable(); list = next; } } /* - * Invoke any callbacks whose grace period has completed. + * This kthread invokes RCU callbacks whose grace periods have + * elapsed. It is awakened as needed, and takes the place of the + * RCU_SOFTIRQ that was used previously for this purpose. + * This is a kthread, but it is never stopped, at least not until + * the system goes down. + */ +static int rcu_cbs(void *arg) +{ + unsigned long work; + unsigned long flags; + + for (;;) { + wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + local_irq_save(flags); + work = have_rcu_cbs; + have_rcu_cbs = 0; + local_irq_restore(flags); + if (work) { + rcu_process_callbacks(&rcu_sched_ctrlblk); + rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); + } + } + + return 0; /* Not reached, but needed to shut gcc up. */ +} + +/* + * Wake up rcu_cbs() to process callbacks now eligible for invocation. */ -static void rcu_process_callbacks(struct softirq_action *unused) +static void invoke_rcu_cbs(void) { - __rcu_process_callbacks(&rcu_sched_ctrlblk); - __rcu_process_callbacks(&rcu_bh_ctrlblk); - rcu_preempt_process_callbacks(); + unsigned long flags; + + local_irq_save(flags); + have_rcu_cbs = 1; + wake_up(&rcu_cbs_wq); + local_irq_restore(flags); } /* @@ -282,7 +322,12 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __init rcu_init(void) +/* + * Spawn the kthread that invokes RCU callbacks. + */ +static int __init rcu_spawn_kthreads(void) { - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + return 0; } +early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6ceca4f745f..95f9239df51 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -22,6 +22,8 @@ * Author: Paul E. McKenney */ +#include + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -164,9 +166,9 @@ static void rcu_preempt_cpu_qs(void) if (!rcu_preempt_blocked_readers_any()) rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; - /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); } /* @@ -374,7 +376,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - raise_softirq(RCU_SOFTIRQ); + invoke_rcu_cbs(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -383,7 +385,7 @@ static void rcu_preempt_check_callbacks(void) /* * TINY_PREEMPT_RCU has an extra callback-list tail pointer to - * update, so this is invoked from __rcu_process_callbacks() to + * update, so this is invoked from rcu_process_callbacks() to * handle that case. Of course, it is invoked for all flavors of * RCU, but RCU callbacks can appear only on one of the lists, and * neither ->nexttail nor ->donetail can possibly be NULL, so there @@ -400,7 +402,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_preempt_process_callbacks(void) { - __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); + rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); } /* @@ -599,14 +601,13 @@ static void rcu_preempt_process_callbacks(void) #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC - #include /* * During boot, we forgive RCU lockdep issues. After this function is * invoked, we start taking RCU lockdep issues seriously. */ -void rcu_scheduler_starting(void) +void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); rcu_scheduler_active = 1; -- cgit v1.2.3-70-g09d2 From 5f2b0ba4d94b3ac23cbc4b7f675d98eb677a760a Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:23 -0500 Subject: x86, nmi_watchdog: Remove the old nmi_watchdog Now that we have a new nmi_watchdog that is more generic and sits on top of the perf subsystem, we really do not need the old nmi_watchdog any more. In addition, the old nmi_watchdog doesn't really work if you are using the default clocksource, hpet. The old nmi_watchdog code relied on local apic interrupts to determine if the cpu is still alive. With hpet as the clocksource, these interrupts don't increment any more and the old nmi_watchdog triggers false postives. This piece removes the old nmi_watchdog code and stubs out any variables and functions calls. The stubs are the same ones used by the new nmi_watchdog code, so it should be well tested. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 4 - arch/x86/kernel/apic/Makefile | 5 +- arch/x86/kernel/apic/hw_nmi.c | 6 +- arch/x86/kernel/apic/nmi.c | 567 ------------------------------------------ arch/x86/kernel/traps.c | 9 - include/linux/nmi.h | 6 +- kernel/sysctl.c | 16 -- 7 files changed, 5 insertions(+), 608 deletions(-) delete mode 100644 arch/x86/kernel/apic/nmi.c (limited to 'include/linux') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b..33292ec848c 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -17,9 +17,6 @@ int do_nmi_callback(struct pt_regs *regs, int cpu); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); -#if !defined(CONFIG_LOCKUP_DETECTOR) -extern int nmi_watchdog_enabled; -#endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); @@ -30,7 +27,6 @@ extern void setup_apic_nmi_watchdog(void *); extern void stop_apic_nmi_watchdog(void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); extern void cpu_nmi_set_wd_enabled(void); extern atomic_t nmi_active; diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c..3966b564ea4 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -3,10 +3,7 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) -obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o -endif -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o +obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e..b68b1746001 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -20,12 +20,14 @@ /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; +#ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } +#endif -#ifdef ARCH_HAS_NMI_WATCHDOG +#ifdef arch_trigger_all_cpu_backtrace void arch_trigger_all_cpu_backtrace(void) { int i; @@ -95,8 +97,6 @@ early_initcall(register_trigger_all_cpu_backtrace); #if defined(CONFIG_X86_LOCAL_APIC) unsigned int nmi_watchdog = NMI_NONE; EXPORT_SYMBOL(nmi_watchdog); -void acpi_nmi_enable(void) { return; } -void acpi_nmi_disable(void) { return; } #endif atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb74..00000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); - -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); - -static int panic_on_timeout; - -static unsigned int nmi_hz = HZ; -static DEFINE_PER_CPU(short, wd_enabled); -static int endflag __initdata; - -static inline unsigned int get_nmi_count(int cpu) -{ - return per_cpu(irq_stat, cpu).__nmi_count; -} - -static inline int mce_in_progress(void) -{ -#if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; -} - -#ifdef CONFIG_SMP -/* - * The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* - * Intentionally don't use cpu_relax here. This is - * to make sure that the performance counter really ticks, - * even if there is a simulator or similar that catches the - * pause instruction. On a real HT machine this is fine because - * all other CPUs are busy with "useless" delay loops and don't - * care if they get somewhat less cycles. - */ - while (endflag == 0) - mb(); -} -#endif - -static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) -{ - printk(KERN_CONT "\n"); - - printk(KERN_WARNING - "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); - - printk(KERN_WARNING - "Please report this to bugzilla.kernel.org,\n"); - printk(KERN_WARNING - "and attach the output of the 'dmesg' command.\n"); - - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - goto error; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) - report_broken_nmi(cpu, prev_nmi_count); - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - goto error; - } - printk("OK.\n"); - - /* - * now that we know it works we can reduce NMI frequency to - * something more reasonable; makes a difference in some configs - */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -error: - if (nmi_watchdog == NMI_IO_APIC) { - if (!timer_through_8259) - legacy_pic->mask(0); - on_each_cpu(__acpi_nmi_disable, NULL, 1); - } - -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - return -1; -} - -static int __init setup_nmi_watchdog(char *str) -{ - unsigned int nmi; - - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - if (!strncmp(str, "lapic", 5)) - nmi_watchdog = NMI_LOCAL_APIC; - else if (!strncmp(str, "ioapic", 6)) - nmi_watchdog = NMI_IO_APIC; - else { - get_option(&str, &nmi); - if (nmi >= NMI_INVALID) - return 0; - nmi_watchdog = nmi; - } - - return 1; -} -__setup("nmi_watchdog=", setup_nmi_watchdog); - -/* - * Suspend/resume support - */ -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* - * should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} - -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 1); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 1); -} - -/* - * This function is called as soon the LAPIC NMI watchdog driver has everything - * in place and it's ready to check if the NMIs belong to the NMI watchdog - */ -void cpu_nmi_set_wd_enabled(void) -{ - __get_cpu_var(wd_enabled) = 1; -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if (!nmi_watchdog_active()) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - else - __acpi_nmi_disable(NULL); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up here too!] - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(long, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog_active()) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - - raw_spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - dump_stack(); - raw_spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - - rc = 1; - } - - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - touched = 1; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - __this_cpu_inc(alert_counter); - if (__this_cpu_read(alert_counter) == 5 * nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(alert_counter, 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* - * don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static void enable_ioapic_nmi_watchdog_single(void *unused) -{ - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - __acpi_nmi_enable(NULL); -} - -static void enable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); - touch_nmi_watchdog(); -} - -static void disable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); -} - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { - printk(KERN_WARNING - "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - if (nmi_watchdog_enabled) - enable_ioapic_nmi_watchdog(); - else - disable_ioapic_nmi_watchdog(); - } else { - printk(KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif /* CONFIG_SYSCTL */ - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void arch_trigger_all_cpu_backtrace(void) -{ - int i; - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - } -} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c9..db30d9cb9dd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -398,15 +398,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) == NOTIFY_STOP) return; -#ifndef CONFIG_LOCKUP_DETECTOR - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) -#endif /* !CONFIG_LOCKUP_DETECTOR */ unknown_nmi_error(reason, regs); #else unknown_nmi_error(reason, regs); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 06aab5eee13..0cb3e5c246d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -16,10 +16,7 @@ */ #ifdef ARCH_HAS_NMI_WATCHDOG #include -extern void touch_nmi_watchdog(void); -extern void acpi_nmi_disable(void); -extern void acpi_nmi_enable(void); -#else +#endif #ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { @@ -30,7 +27,6 @@ extern void touch_nmi_watchdog(void); #endif static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035..ce33e2a2afe 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -746,22 +746,6 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) - { - .procname = "unknown_nmi_panic", - .data = &unknown_nmi_panic, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_nmi_enabled, - }, -#endif #if defined(CONFIG_X86) { .procname = "panic_on_unrecovered_nmi", -- cgit v1.2.3-70-g09d2 From 072b198a4ad48bd722ec6d203d65422a4698eae7 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Nov 2010 11:22:24 -0500 Subject: x86, nmi_watchdog: Remove all stub function calls from old nmi_watchdog Now that the bulk of the old nmi_watchdog is gone, remove all the stub variables and hooks associated with it. This touches lots of files mainly because of how the io_apic nmi_watchdog was implemented. Now that the io_apic nmi_watchdog is forever gone, remove all its fingers. Most of this code was not being exercised by virtue of nmi_watchdog != NMI_IO_APIC, so there shouldn't be anything to risky here. Signed-off-by: Don Zickus Cc: fweisbec@gmail.com Cc: gorcunov@openvz.org LKML-Reference: <1289578944-28564-3-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 47 --- arch/x86/include/asm/smpboot_hooks.h | 1 - arch/x86/include/asm/timer.h | 6 - arch/x86/kernel/apic/apic.c | 15 +- arch/x86/kernel/apic/hw_nmi.c | 10 - arch/x86/kernel/apic/io_apic.c | 46 --- arch/x86/kernel/cpu/perf_event.c | 9 - arch/x86/kernel/cpu/perfctr-watchdog.c | 642 --------------------------------- arch/x86/kernel/smpboot.c | 11 - arch/x86/kernel/time.c | 18 - arch/x86/kernel/traps.c | 2 - arch/x86/oprofile/nmi_timer_int.c | 3 - drivers/acpi/acpica/nsinit.c | 2 - drivers/watchdog/hpwdt.c | 7 +- include/linux/nmi.h | 2 - 15 files changed, 2 insertions(+), 819 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 33292ec848c..3545838cdde 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,35 +7,13 @@ #ifdef ARCH_HAS_NMI_WATCHDOG -/** - * do_nmi_callback - * - * Check to see if a callback exists and execute it. Return 1 - * if the handler exists and was handled successfully. - */ -int do_nmi_callback(struct pt_regs *regs, int cpu); - extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -extern int check_nmi_watchdog(void); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); -extern void setup_apic_nmi_watchdog(void *); -extern void stop_apic_nmi_watchdog(void *); -extern void disable_timer_nmi_watchdog(void); -extern void enable_timer_nmi_watchdog(void); -extern void cpu_nmi_set_wd_enabled(void); - -extern atomic_t nmi_active; -extern unsigned int nmi_watchdog; -#define NMI_NONE 0 -#define NMI_IO_APIC 1 -#define NMI_LOCAL_APIC 2 -#define NMI_INVALID 3 - struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -43,33 +21,8 @@ extern int unknown_nmi_panic; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace - -static inline void localise_nmi_watchdog(void) -{ - if (nmi_watchdog == NMI_IO_APIC) - nmi_watchdog = NMI_LOCAL_APIC; -} - -/* check if nmi_watchdog is active (ie was specified at boot) */ -static inline int nmi_watchdog_active(void) -{ - /* - * actually it should be: - * return (nmi_watchdog == NMI_LOCAL_APIC || - * nmi_watchdog == NMI_IO_APIC) - * but since they are power of two we could use a - * cheaper way --cvg - */ - return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); -} #endif -void lapic_watchdog_stop(void); -int lapic_watchdog_init(unsigned nmi_hz); -int lapic_wd_event(unsigned nmi_hz); -unsigned lapic_adjust_nmi_hz(unsigned hz); -void disable_lapic_nmi_watchdog(void); -void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def6011490..6c22bf353f2 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } #endif } diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f..fa7b9176b76 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -10,12 +10,6 @@ unsigned long long native_sched_clock(void); extern int recalibrate_cpu_khz(void); -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -extern int timer_ack; -#else -# define timer_ack (0) -#endif - extern int no_timer_check; /* Accelerators for sched_clock() diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d53739..e9e2a93783f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - pr_warning("APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void) } #endif - setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -1750,17 +1744,10 @@ int __init APIC_init_uniprocessor(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } -#else - localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); -#ifdef CONFIG_X86_64 - check_nmi_watchdog(); -#endif - return 0; } diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index b68b1746001..3e25afe9a62 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -94,14 +94,4 @@ early_initcall(register_trigger_all_cpu_backtrace); #endif /* STUB calls to mimic old nmi_watchdog behaviour */ -#if defined(CONFIG_X86_LOCAL_APIC) -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); -#endif -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); int unknown_nmi_panic; -void cpu_nmi_set_wd_enabled(void) { return; } -void stop_apic_nmi_watchdog(void *unused) { return; } -void setup_apic_nmi_watchdog(void *unused) { return; } -int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f62..e4a040c28de 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -2643,24 +2642,6 @@ static void lapic_register_intr(int irq) "edge"); } -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2766,15 +2747,6 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); -#ifdef CONFIG_X86_32 - { - unsigned int ver; - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - } -#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2822,10 +2794,6 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - legacy_pic->unmask(0); - } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2851,11 +2819,6 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - setup_nmi(); - legacy_pic->unmask(0); - } goto out; } /* @@ -2867,15 +2830,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183ef..1f129a14e3a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) { int i; - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; @@ -355,9 +352,6 @@ perfctr_fail: for (i--; i >= 0; i--) release_perfctr_nmi(x86_pmu.perfctr + i); - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - return false; } @@ -369,9 +363,6 @@ static void release_pmc_hardware(void) release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); } #else diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd6..14d45928c28 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -22,26 +22,6 @@ #include #include -struct nmi_watchdog_ctlblk { - unsigned int cccr_msr; - unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ - unsigned int evntsel_msr; /* the MSR to select the events to handle */ -}; - -/* Interface defining a CPU specific perfctr watchdog */ -struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); - unsigned perfctr; - unsigned evntsel; - u64 checkbit; -}; - -static const struct wd_ops *wd_ops; - /* * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. @@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); -static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); - /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { @@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } EXPORT_SYMBOL(release_evntsel_nmi); - -void disable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - if (atomic_read(&nmi_active) <= 0) - return; - - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); - - if (wd_ops) - wd_ops->unreserve(); - - BUG_ON(atomic_read(&nmi_active) != 0); -} - -void enable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - /* are we already enabled */ - if (atomic_read(&nmi_active) != 0) - return; - - /* are we lapic aware */ - if (!wd_ops) - return; - if (!wd_ops->reserve()) { - printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); - return; - } - - on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); - touch_nmi_watchdog(); -} - -/* - * Activate the NMI watchdog via the local APIC. - */ - -static unsigned int adjust_for_32bit_ctr(unsigned int hz) -{ - u64 counter_val; - unsigned int retval = hz; - - /* - * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter - * are writable, with higher bits sign extending from bit 31. - * So, we can only program the counter with 31 bit values and - * 32nd bit should be 1, for 33.. to be 1. - * Find the appropriate nmi_hz - */ - counter_val = (u64)cpu_khz * 1000; - do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { - u64 count = (u64)cpu_khz * 1000; - do_div(count, 0x7fffffffUL); - retval = count + 1; - } - return retval; -} - -static void write_watchdog_counter(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(perfctr_msr, 0 - count); -} - -static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsr(perfctr_msr, (u32)(-count), 0); -} - -/* - * AMD K7/K8/Family10h/Family11h support. - * AMD keeps this interface nicely stable so there is not much variety - */ -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -static int setup_k7_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void single_msr_stop_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int single_msr_reserve(void) -{ - if (!reserve_perfctr_nmi(wd_ops->perfctr)) - return 0; - - if (!reserve_evntsel_nmi(wd_ops->evntsel)) { - release_perfctr_nmi(wd_ops->perfctr); - return 0; - } - return 1; -} - -static void single_msr_unreserve(void) -{ - release_evntsel_nmi(wd_ops->evntsel); - release_perfctr_nmi(wd_ops->perfctr); -} - -static void __kprobes -single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL << 47, -}; - -/* - * Intel Model 6 (PPro+,P2,P3,P-M,Core1) - */ -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -static int setup_p6_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - /* KVM doesn't implement this MSR */ - if (wrmsr_safe(perfctr_msr, 0, 0) < 0) - return 0; - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* - * P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant. - * ArchPerfom/Core Duo also needs this - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL << 39, -}; - -/* - * Intel P4 performance counters. - * By far the most complicated of all. - */ -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) -#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) -#define P4_ESCR_OS (1 << 3) -#define P4_ESCR_USR (1 << 2) -#define P4_CCCR_OVF_PMI0 (1 << 26) -#define P4_CCCR_OVF_PMI1 (1 << 27) -#define P4_CCCR_THRESHOLD(N) ((N) << 20) -#define P4_CCCR_COMPLEMENT (1 << 19) -#define P4_CCCR_COMPARE (1 << 18) -#define P4_CCCR_REQUIRED (3 << 16) -#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) -#define P4_CCCR_ENABLE (1 << 12) -#define P4_CCCR_OVF (1 << 31) - -#define P4_CONTROLS 18 -static unsigned int p4_controls[18] = { - MSR_P4_BPU_CCCR0, - MSR_P4_BPU_CCCR1, - MSR_P4_BPU_CCCR2, - MSR_P4_BPU_CCCR3, - MSR_P4_MS_CCCR0, - MSR_P4_MS_CCCR1, - MSR_P4_MS_CCCR2, - MSR_P4_MS_CCCR3, - MSR_P4_FLAME_CCCR0, - MSR_P4_FLAME_CCCR1, - MSR_P4_FLAME_CCCR2, - MSR_P4_FLAME_CCCR3, - MSR_P4_IQ_CCCR0, - MSR_P4_IQ_CCCR1, - MSR_P4_IQ_CCCR2, - MSR_P4_IQ_CCCR3, - MSR_P4_IQ_CCCR4, - MSR_P4_IQ_CCCR5, -}; -/* - * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - * CRU_ESCR0 (with any non-null event selector) through a complemented - * max threshold. [IA32-Vol3, Section 14.9.9] - */ -static int setup_p4_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr, cccr_msr; - unsigned int evntsel, cccr_val; - unsigned int misc_enable, dummy; - unsigned int ht_num; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - -#ifdef CONFIG_SMP - /* detect which hyperthread we are on */ - if (smp_num_siblings == 2) { - unsigned int ebx, apicid; - - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; - } else -#endif - ht_num = 0; - - /* - * performance counters are shared resources - * assign each hyperthread its own set - * (re-use the ESCR0 register, seems safe - * and keeps the cccr_val the same) - */ - if (!ht_num) { - /* logical cpu 0 */ - perfctr_msr = MSR_P4_IQ_PERFCTR0; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR0; - cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); - - /* - * If we're on the kdump kernel or other situation, we may - * still have other performance counter registers set to - * interrupt and they'll keep interrupting forever because - * of the P4_CCCR_OVF quirk. So we need to ACK all the - * pending interrupts and disable all the registers here, - * before reenabling the NMI delivery. Refer to p4_rearm() - * about the P4_CCCR_OVF quirk. - */ - if (reset_devices) { - unsigned int low, high; - int i; - - for (i = 0; i < P4_CONTROLS; i++) { - rdmsr(p4_controls[i], low, high); - low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); - wrmsr(p4_controls[i], low, high); - } - } - } else { - /* logical cpu 1 */ - perfctr_msr = MSR_P4_IQ_PERFCTR1; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR1; - - /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ - if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) - cccr_val = P4_CCCR_OVF_PMI0; - else - cccr_val = P4_CCCR_OVF_PMI1; - cccr_val |= P4_CCCR_ESCR_SELECT(4); - } - - evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS - | P4_ESCR_USR; - - cccr_val |= P4_CCCR_THRESHOLD(15) - | P4_CCCR_COMPLEMENT - | P4_CCCR_COMPARE - | P4_CCCR_REQUIRED; - - wrmsr(evntsel_msr, evntsel, 0); - wrmsr(cccr_msr, cccr_val, 0); - write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = cccr_msr; - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); - return 1; -} - -static void stop_p4_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - wrmsr(wd->cccr_msr, 0, 0); - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int p4_reserve(void) -{ - if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) - return 0; -#ifdef CONFIG_SMP - if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) - goto fail1; -#endif - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) - goto fail2; - /* RED-PEN why is ESCR1 not reserved here? */ - return 1; - fail2: -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); - fail1: -#endif - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); - return 0; -} - -static void p4_unreserve(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); -#endif - release_evntsel_nmi(MSR_P4_CRU_ESCR0); - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); -} - -static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - unsigned dummy; - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - rdmsrl(wd->cccr_msr, dummy); - dummy &= ~P4_CCCR_OVF; - wrmsrl(wd->cccr_msr, dummy); - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, - /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL << 39, -}; - -/* - * Watchdog using the Intel architected PerfMon. - * Used for Core2 and hopefully all future Intel CPUs. - */ -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - -static struct wd_ops intel_arch_wd_ops; - -static int setup_intel_arch_watchdog(unsigned nmi_hz) -{ - unsigned int ebx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebx indicates event present. - */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < - (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || - (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); - return 1; -} - -static struct wd_ops intel_arch_wd_ops __read_mostly = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, -}; - -static void probe_nmi_watchdog(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 == 6 || - (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) - wd_ops = &k7_wd_ops; - return; - case X86_VENDOR_INTEL: - /* Work around where perfctr1 doesn't have a working enable - * bit as described in the following errata: - * AE49 Core Duo and Intel Core Solo 65 nm - * AN49 Intel Pentium Dual-Core - * AF49 Dual-Core Intel Xeon Processor LV - */ - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || - ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && - boot_cpu_data.x86_mask == 4))) { - intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; - intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; - } - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - wd_ops = &intel_arch_wd_ops; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 13) - return; - - wd_ops = &p6_wd_ops; - break; - case 15: - wd_ops = &p4_wd_ops; - break; - default: - return; - } - break; - } -} - -/* Interface to nmi.c */ - -int lapic_watchdog_init(unsigned nmi_hz) -{ - if (!wd_ops) { - probe_nmi_watchdog(); - if (!wd_ops) { - printk(KERN_INFO "NMI watchdog: CPU not supported\n"); - return -1; - } - - if (!wd_ops->reserve()) { - printk(KERN_ERR - "NMI watchdog: cannot reserve perfctrs\n"); - return -1; - } - } - - if (!(wd_ops->setup(nmi_hz))) { - printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", - raw_smp_processor_id()); - return -1; - } - - return 0; -} - -void lapic_watchdog_stop(void) -{ - if (wd_ops) - wd_ops->stop(); -} - -unsigned lapic_adjust_nmi_hz(unsigned hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - if (wd->perfctr_msr == MSR_P6_PERFCTR0 || - wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) - hz = adjust_for_32bit_ctr(hz); - return hz; -} - -int __kprobes lapic_wd_event(unsigned nmi_hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - u64 ctr; - - rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) /* perfctr still running? */ - return 0; - - wd_ops->rearm(wd, nmi_hz); - return 1; -} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7d..f0a0624eea5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -316,12 +316,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - enable_NMI_through_LVT0(); - legacy_pic->unmask(0); - } - /* This must be done before setting cpu_online_mask */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -1061,8 +1055,6 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); - localise_nmi_watchdog(); - connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); @@ -1196,7 +1188,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif - check_nmi_watchdog(); mtrr_aps_init(); } @@ -1341,8 +1332,6 @@ int native_cpu_disable(void) if (cpu == 0) return -EBUSY; - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cf..25a28a24593 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -22,10 +22,6 @@ #include #include -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -int timer_ack; -#endif - #ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); - /* Optimized out for !IO_APIC and x86_64 */ - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - raw_spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - raw_spin_unlock(&i8259A_lock); - } - global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index db30d9cb9dd..f02c179c255 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -437,14 +437,12 @@ do_nmi(struct pt_regs *regs, long error_code) void stop_nmi(void) { - acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; - acpi_nmi_enable(); } /* May run on IST stack. */ diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b579..0636dd93cef 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -58,9 +58,6 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations *ops) { - if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) - return -ENODEV; - ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 660a2728908..0cac7ec0d2e 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, * as possible (without an NMI being received in the middle of * this) - so disable NMIs and initialize the device: */ - acpi_nmi_disable(); status = acpi_ns_evaluate(info); - acpi_nmi_enable(); if (ACPI_SUCCESS(status)) { walk_info->num_INI++; diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 3d77116e463..c19f4a20794 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -649,12 +649,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) * If nmi_watchdog is turned off then we can turn on * our nmi decoding capability. */ - if (!nmi_watchdog_active()) - hpwdt_nmi_decoding = 1; - else - dev_warn(&dev->dev, "NMI decoding is disabled. To enable this " - "functionality you must reboot with nmi_watchdog=0 " - "and load the hpwdt driver with priority=1.\n"); + hpwdt_nmi_decoding = 1; } #else static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0cb3e5c246d..1c451e6ecc1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -25,8 +25,6 @@ static inline void touch_nmi_watchdog(void) #else extern void touch_nmi_watchdog(void); #endif -static inline void acpi_nmi_disable(void) { } -static inline void acpi_nmi_enable(void) { } /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3-70-g09d2 From 48c5ccae88dcd989d9de507e8510313c6cbd352b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 13 Nov 2010 19:32:29 +0100 Subject: sched: Simplify cpu-hot-unplug task migration While discussing the need for sched_idle_next(), Oleg remarked that since try_to_wake_up() ensures sleeping tasks will end up running on a sane cpu, we can do away with migrate_live_tasks(). If we then extend the existing hack of migrating current from CPU_DYING to migrating the full rq worth of tasks from CPU_DYING, the need for the sched_idle_next() abomination disappears as well, since idle will be the only possible thread left after the migration thread stops. This greatly simplifies the hot-unplug task migration path, as can be seen from the resulting code reduction (and about half the new lines are comments). Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <1289851597.2109.547.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 - kernel/cpu.c | 16 ++-- kernel/sched.c | 206 +++++++++++++++----------------------------------- 3 files changed, 67 insertions(+), 158 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cd70cf91fd..29d953abb5a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1871,14 +1871,11 @@ extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); #ifdef CONFIG_HOTPLUG_CPU -extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); extern void idle_task_exit(void); #else static inline void idle_task_exit(void) {} #endif -extern void sched_idle_next(void); - #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) extern void wake_up_idle_cpu(int cpu); #else diff --git a/kernel/cpu.c b/kernel/cpu.c index f6e726f1849..8615aa65d92 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu) } struct take_cpu_down_param { - struct task_struct *caller; unsigned long mod; void *hcpu; }; @@ -208,11 +207,6 @@ static int __ref take_cpu_down(void *_param) cpu_notify(CPU_DYING | param->mod, param->hcpu); - if (task_cpu(param->caller) == cpu) - move_task_off_dead_cpu(cpu, param->caller); - /* Force idle task to run as soon as we yield: it should - immediately notice cpu is offline and die quickly. */ - sched_idle_next(); return 0; } @@ -223,7 +217,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; struct take_cpu_down_param tcd_param = { - .caller = current, .mod = mod, .hcpu = hcpu, }; @@ -253,9 +246,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) } BUG_ON(cpu_online(cpu)); - /* Wait for it to sleep (leaving idle task). */ - while (!idle_cpu(cpu)) - yield(); + /* + * The migration_call() CPU_DYING callback will have removed all + * runnable tasks from the cpu, there's only the idle task left now + * that the migration thread is done doing the stop_machine thing. + */ + BUG_ON(!idle_cpu(cpu)); /* This actually kills the CPU. */ __cpu_die(cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 41f18695b73..b0d5f1b24a3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2366,18 +2366,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p) return dest_cpu; /* No more Mr. Nice Guy. */ - if (unlikely(dest_cpu >= nr_cpu_ids)) { - dest_cpu = cpuset_cpus_allowed_fallback(p); - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, cpu); - } + dest_cpu = cpuset_cpus_allowed_fallback(p); + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); } return dest_cpu; @@ -5712,29 +5709,20 @@ static int migration_cpu_stop(void *data) } #ifdef CONFIG_HOTPLUG_CPU + /* - * Figure out where task on dead CPU should go, use force if necessary. + * Ensures that the idle task is using init_mm right before its cpu goes + * offline. */ -void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) +void idle_task_exit(void) { - struct rq *rq = cpu_rq(dead_cpu); - int needs_cpu, uninitialized_var(dest_cpu); - unsigned long flags; + struct mm_struct *mm = current->active_mm; - local_irq_save(flags); + BUG_ON(cpu_online(smp_processor_id())); - raw_spin_lock(&rq->lock); - needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); - if (needs_cpu) - dest_cpu = select_fallback_rq(dead_cpu, p); - raw_spin_unlock(&rq->lock); - /* - * It can only fail if we race with set_cpus_allowed(), - * in the racer should migrate the task anyway. - */ - if (needs_cpu) - __migrate_task(p, dead_cpu, dest_cpu); - local_irq_restore(flags); + if (mm != &init_mm) + switch_mm(mm, &init_mm, current); + mmdrop(mm); } /* @@ -5747,128 +5735,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) static void migrate_nr_uninterruptible(struct rq *rq_src) { struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); - unsigned long flags; - local_irq_save(flags); - double_rq_lock(rq_src, rq_dest); rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; rq_src->nr_uninterruptible = 0; - double_rq_unlock(rq_src, rq_dest); - local_irq_restore(flags); -} - -/* Run through task list and migrate tasks from the dead cpu. */ -static void migrate_live_tasks(int src_cpu) -{ - struct task_struct *p, *t; - - read_lock(&tasklist_lock); - - do_each_thread(t, p) { - if (p == current) - continue; - - if (task_cpu(p) == src_cpu) - move_task_off_dead_cpu(src_cpu, p); - } while_each_thread(t, p); - - read_unlock(&tasklist_lock); } /* - * Schedules idle task to be the next runnable task on current CPU. - * It does so by boosting its priority to highest possible. - * Used by CPU offline code. + * remove the tasks which were accounted by rq from calc_load_tasks. */ -void sched_idle_next(void) +static void calc_global_load_remove(struct rq *rq) { - int this_cpu = smp_processor_id(); - struct rq *rq = cpu_rq(this_cpu); - struct task_struct *p = rq->idle; - unsigned long flags; - - /* cpu has to be offline */ - BUG_ON(cpu_online(this_cpu)); - - /* - * Strictly not necessary since rest of the CPUs are stopped by now - * and interrupts disabled on the current cpu. - */ - raw_spin_lock_irqsave(&rq->lock, flags); - - __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); - - activate_task(rq, p, 0); - - raw_spin_unlock_irqrestore(&rq->lock, flags); + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); + rq->calc_load_active = 0; } /* - * Ensures that the idle task is using init_mm right before its cpu goes - * offline. + * Migrate all tasks from the rq, sleeping tasks will be migrated by + * try_to_wake_up()->select_task_rq(). + * + * Called with rq->lock held even though we'er in stop_machine() and + * there's no concurrency possible, we hold the required locks anyway + * because of lock validation efforts. */ -void idle_task_exit(void) -{ - struct mm_struct *mm = current->active_mm; - - BUG_ON(cpu_online(smp_processor_id())); - - if (mm != &init_mm) - switch_mm(mm, &init_mm, current); - mmdrop(mm); -} - -/* called under rq->lock with disabled interrupts */ -static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) +static void migrate_tasks(unsigned int dead_cpu) { struct rq *rq = cpu_rq(dead_cpu); - - /* Must be exiting, otherwise would be on tasklist. */ - BUG_ON(!p->exit_state); - - /* Cannot have done final schedule yet: would have vanished. */ - BUG_ON(p->state == TASK_DEAD); - - get_task_struct(p); + struct task_struct *next, *stop = rq->stop; + int dest_cpu; /* - * Drop lock around migration; if someone else moves it, - * that's OK. No task can be added to this CPU, so iteration is - * fine. + * Fudge the rq selection such that the below task selection loop + * doesn't get stuck on the currently eligible stop task. + * + * We're currently inside stop_machine() and the rq is either stuck + * in the stop_machine_cpu_stop() loop, or we're executing this code, + * either way we should never end up calling schedule() until we're + * done here. */ - raw_spin_unlock_irq(&rq->lock); - move_task_off_dead_cpu(dead_cpu, p); - raw_spin_lock_irq(&rq->lock); - - put_task_struct(p); -} - -/* release_task() removes task from tasklist, so we won't find dead tasks. */ -static void migrate_dead_tasks(unsigned int dead_cpu) -{ - struct rq *rq = cpu_rq(dead_cpu); - struct task_struct *next; + rq->stop = NULL; for ( ; ; ) { - if (!rq->nr_running) + /* + * There's this thread running, bail when that's the only + * remaining thread. + */ + if (rq->nr_running == 1) break; + next = pick_next_task(rq); - if (!next) - break; + BUG_ON(!next); next->sched_class->put_prev_task(rq, next); - migrate_dead(dead_cpu, next); + /* Find suitable destination for @next, with force if needed. */ + dest_cpu = select_fallback_rq(dead_cpu, next); + raw_spin_unlock(&rq->lock); + + __migrate_task(next, dead_cpu, dest_cpu); + + raw_spin_lock(&rq->lock); } -} -/* - * remove the tasks which were accounted by rq from calc_load_tasks. - */ -static void calc_global_load_remove(struct rq *rq) -{ - atomic_long_sub(rq->calc_load_active, &calc_load_tasks); - rq->calc_load_active = 0; + rq->stop = stop; } + #endif /* CONFIG_HOTPLUG_CPU */ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) @@ -6078,15 +6007,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned long flags; struct rq *rq = cpu_rq(cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: rq->calc_load_update = calc_load_update; break; case CPU_ONLINE: - case CPU_ONLINE_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { @@ -6098,30 +6025,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - migrate_live_tasks(cpu); - /* Idle task back to normal (off runqueue, low prio) */ - raw_spin_lock_irq(&rq->lock); - deactivate_task(rq, rq->idle, 0); - __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); - rq->idle->sched_class = &idle_sched_class; - migrate_dead_tasks(cpu); - raw_spin_unlock_irq(&rq->lock); - migrate_nr_uninterruptible(rq); - BUG_ON(rq->nr_running != 0); - calc_global_load_remove(rq); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: /* Update our root-domain */ raw_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } + migrate_tasks(cpu); + BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); + + migrate_nr_uninterruptible(rq); + calc_global_load_remove(rq); break; #endif } -- cgit v1.2.3-70-g09d2 From 2069dd75c7d0f49355939e5586daf5a9ab216db7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Nov 2010 15:47:00 -0800 Subject: sched: Rewrite tg_shares_up) By tracking a per-cpu load-avg for each cfs_rq and folding it into a global task_group load on each tick we can rework tg_shares_up to be strictly per-cpu. This should improve cpu-cgroup performance for smp systems significantly. [ Paul: changed to use queueing cfs_rq + bug fixes ] Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234937.580480400@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 - kernel/sched.c | 173 ++++++++++++------------------------------------ kernel/sched_debug.c | 15 +++-- kernel/sched_fair.c | 164 +++++++++++++++++++++++++++++---------------- kernel/sched_features.h | 2 - kernel/sysctl.c | 19 ------ 6 files changed, 162 insertions(+), 213 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 29d953abb5a..8abb8aa5966 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { } extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_shares_ratelimit; -extern unsigned int sysctl_sched_shares_thresh; extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { diff --git a/kernel/sched.c b/kernel/sched.c index b0d5f1b24a3..e2f1a3024a9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -253,6 +253,8 @@ struct task_group { /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; unsigned long shares; + + atomic_t load_weight; #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -359,15 +361,11 @@ struct cfs_rq { */ unsigned long h_load; - /* - * this cpu's part of tg->shares - */ - unsigned long shares; + u64 load_avg; + u64 load_period; + u64 load_stamp; - /* - * load.weight at the time we set shares - */ - unsigned long rq_weight; + unsigned long load_contribution; #endif #endif }; @@ -806,20 +804,6 @@ late_initcall(sched_init_debug); */ const_debug unsigned int sysctl_sched_nr_migrate = 32; -/* - * ratelimit for updating the group shares. - * default: 0.25ms - */ -unsigned int sysctl_sched_shares_ratelimit = 250000; -unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; - -/* - * Inject some fuzzyness into changing the per-cpu group shares - * this avoids remote rq-locks at the expense of fairness. - * default: 4 - */ -unsigned int sysctl_sched_shares_thresh = 4; - /* * period over which we average the RT time consumption, measured * in ms. @@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) lw->inv_weight = 0; } +static inline void update_load_set(struct load_weight *lw, unsigned long w) +{ + lw->weight = w; + lw->inv_weight = 0; +} + /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that @@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long __percpu *update_shares_data; - -static void __set_se_shares(struct sched_entity *se, unsigned long shares); - -/* - * Calculate and set the cpu's group shares. - */ -static void update_group_shares_cpu(struct task_group *tg, int cpu, - unsigned long sd_shares, - unsigned long sd_rq_weight, - unsigned long *usd_rq_weight) -{ - unsigned long shares, rq_weight; - int boost = 0; - - rq_weight = usd_rq_weight[cpu]; - if (!rq_weight) { - boost = 1; - rq_weight = NICE_0_LOAD; - } - - /* - * \Sum_j shares_j * rq_weight_i - * shares_i = ----------------------------- - * \Sum_j rq_weight_j - */ - shares = (sd_shares * rq_weight) / sd_rq_weight; - shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); - - if (abs(shares - tg->se[cpu]->load.weight) > - sysctl_sched_shares_thresh) { - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - - raw_spin_lock_irqsave(&rq->lock, flags); - tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; - tg->cfs_rq[cpu]->shares = boost ? 0 : shares; - __set_se_shares(tg->se[cpu], shares); - raw_spin_unlock_irqrestore(&rq->lock, flags); - } -} +static void update_cfs_load(struct cfs_rq *cfs_rq); +static void update_cfs_shares(struct cfs_rq *cfs_rq); /* - * Re-compute the task group their per cpu shares over the given domain. - * This needs to be done in a bottom-up fashion because the rq weight of a - * parent group depends on the shares of its child groups. + * update tg->load_weight by folding this cpu's load_avg */ static int tg_shares_up(struct task_group *tg, void *data) { - unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; - unsigned long *usd_rq_weight; - struct sched_domain *sd = data; + long load_avg; + struct cfs_rq *cfs_rq; unsigned long flags; - int i; + int cpu = (long)data; + struct rq *rq; - if (!tg->se[0]) + if (!tg->se[cpu]) return 0; - local_irq_save(flags); - usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); - - for_each_cpu(i, sched_domain_span(sd)) { - weight = tg->cfs_rq[i]->load.weight; - usd_rq_weight[i] = weight; - - rq_weight += weight; - /* - * If there are currently no tasks on the cpu pretend there - * is one of average load so that when a new task gets to - * run here it will not get delayed by group starvation. - */ - if (!weight) - weight = NICE_0_LOAD; + rq = cpu_rq(cpu); + cfs_rq = tg->cfs_rq[cpu]; - sum_weight += weight; - shares += tg->cfs_rq[i]->shares; - } + raw_spin_lock_irqsave(&rq->lock, flags); - if (!rq_weight) - rq_weight = sum_weight; + update_rq_clock(rq); + update_cfs_load(cfs_rq); - if ((!shares && rq_weight) || shares > tg->shares) - shares = tg->shares; + load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); + load_avg -= cfs_rq->load_contribution; - if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) - shares = tg->shares; + atomic_add(load_avg, &tg->load_weight); + cfs_rq->load_contribution += load_avg; - for_each_cpu(i, sched_domain_span(sd)) - update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); + /* + * We need to update shares after updating tg->load_weight in + * order to adjust the weight of groups with long running tasks. + */ + update_cfs_shares(cfs_rq); - local_irq_restore(flags); + raw_spin_unlock_irqrestore(&rq->lock, flags); return 0; } @@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data) load = cpu_rq(cpu)->load.weight; } else { load = tg->parent->cfs_rq[cpu]->h_load; - load *= tg->cfs_rq[cpu]->shares; + load *= tg->se[cpu]->load.weight; load /= tg->parent->cfs_rq[cpu]->load.weight + 1; } @@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data) return 0; } -static void update_shares(struct sched_domain *sd) +static void update_shares(long cpu) { - s64 elapsed; - u64 now; - if (root_task_group_empty()) return; - now = local_clock(); - elapsed = now - sd->last_update; + /* + * XXX: replace with an on-demand list + */ - if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { - sd->last_update = now; - walk_tg_tree(tg_nop, tg_shares_up, sd); - } + walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu); } static void update_h_load(long cpu) @@ -1699,7 +1631,7 @@ static void update_h_load(long cpu) #else -static inline void update_shares(struct sched_domain *sd) +static inline void update_shares(int cpu) { } @@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) #endif -#ifdef CONFIG_FAIR_GROUP_SCHED -static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) -{ -#ifdef CONFIG_SMP - cfs_rq->shares = shares; -#endif -} -#endif - static void calc_load_account_idle(struct rq *this_rq); static void update_sysctl(void); static int get_update_sysctl_factor(void); @@ -5551,7 +5474,6 @@ static void update_sysctl(void) SET_SYSCTL(sched_min_granularity); SET_SYSCTL(sched_latency); SET_SYSCTL(sched_wakeup_granularity); - SET_SYSCTL(sched_shares_ratelimit); #undef SET_SYSCTL } @@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->cfs_rq = parent->my_q; se->my_q = cfs_rq; - se->load.weight = tg->shares; - se->load.inv_weight = 0; + update_load_set(&se->load, tg->shares); se->parent = parent; } #endif @@ -7881,10 +7802,6 @@ void __init sched_init(void) #endif /* CONFIG_CGROUP_SCHED */ -#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP - update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), - __alignof__(unsigned long)); -#endif for_each_possible_cpu(i) { struct rq *rq; @@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) if (on_rq) dequeue_entity(cfs_rq, se, 0); - se->load.weight = shares; - se->load.inv_weight = 0; + update_load_set(&se->load, shares); if (on_rq) enqueue_entity(cfs_rq, se, 0); @@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) /* * force a rebalance */ - cfs_rq_set_shares(tg->cfs_rq[i], 0); set_se_shares(tg->se[i], shares); } diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 2e1b0d17dd9..e6590e7312e 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); - SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); - SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); - SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); + SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", + SPLIT_NS(cfs_rq->load_avg)); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", + SPLIT_NS(cfs_rq->load_period)); + SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", + cfs_rq->load_contribution); + SEQ_printf(m, " .%-30s: %d\n", "load_tg", + atomic_read(&tg->load_weight)); #endif + print_cfs_group_stats(m, cpu, cfs_rq->tg); #endif } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd..d86544b4151 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write, WRT_SYSCTL(sched_min_granularity); WRT_SYSCTL(sched_latency); WRT_SYSCTL(sched_wakeup_granularity); - WRT_SYSCTL(sched_shares_ratelimit); #undef WRT_SYSCTL return 0; @@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_add(&se->group_node, &cfs_rq->tasks); } cfs_rq->nr_running++; - se->on_rq = 1; } static void @@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_del_init(&se->group_node); } cfs_rq->nr_running--; - se->on_rq = 0; } +#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED +static void update_cfs_load(struct cfs_rq *cfs_rq) +{ + u64 period = sched_avg_period(); + u64 now, delta; + + if (!cfs_rq) + return; + + now = rq_of(cfs_rq)->clock; + delta = now - cfs_rq->load_stamp; + + cfs_rq->load_stamp = now; + cfs_rq->load_period += delta; + cfs_rq->load_avg += delta * cfs_rq->load.weight; + + while (cfs_rq->load_period > period) { + /* + * Inline assembly required to prevent the compiler + * optimising this loop into a divmod call. + * See __iter_div_u64_rem() for another example of this. + */ + asm("" : "+rm" (cfs_rq->load_period)); + cfs_rq->load_period /= 2; + cfs_rq->load_avg /= 2; + } +} + +static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) +{ + if (se->on_rq) + account_entity_dequeue(cfs_rq, se); + + update_load_set(&se->load, weight); + + if (se->on_rq) + account_entity_enqueue(cfs_rq, se); +} + +static void update_cfs_shares(struct cfs_rq *cfs_rq) +{ + struct task_group *tg; + struct sched_entity *se; + long load_weight, load, shares; + + if (!cfs_rq) + return; + + tg = cfs_rq->tg; + se = tg->se[cpu_of(rq_of(cfs_rq))]; + if (!se) + return; + + load = cfs_rq->load.weight; + + load_weight = atomic_read(&tg->load_weight); + load_weight -= cfs_rq->load_contribution; + load_weight += load; + + shares = (tg->shares * load); + if (load_weight) + shares /= load_weight; + + if (shares < MIN_SHARES) + shares = MIN_SHARES; + if (shares > tg->shares) + shares = tg->shares; + + reweight_entity(cfs_rq_of(se), se, shares); +} +#else /* CONFIG_FAIR_GROUP_SCHED */ +static inline void update_cfs_load(struct cfs_rq *cfs_rq) +{ +} + +static inline void update_cfs_shares(struct cfs_rq *cfs_rq) +{ +} +#endif /* CONFIG_FAIR_GROUP_SCHED */ + static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { #ifdef CONFIG_SCHEDSTATS @@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); + update_cfs_load(cfs_rq); account_entity_enqueue(cfs_rq, se); + update_cfs_shares(cfs_rq); if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); @@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) check_spread(cfs_rq, se); if (se != cfs_rq->curr) __enqueue_entity(cfs_rq, se); + se->on_rq = 1; } static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); + se->on_rq = 0; + update_cfs_load(cfs_rq); account_entity_dequeue(cfs_rq, se); update_min_vruntime(cfs_rq); + update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) flags = ENQUEUE_WAKEUP; } + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + update_cfs_load(cfs_rq); + update_cfs_shares(cfs_rq); + } + hrtick_update(rq); } @@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); + /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) break; flags |= DEQUEUE_SLEEP; } + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + update_cfs_load(cfs_rq); + update_cfs_shares(cfs_rq); + } + hrtick_update(rq); } @@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. - * - * The problem is that perfectly aligning the shares is rather expensive, hence - * we try to avoid doing that too often - see update_shares(), which ratelimits - * this change. - * - * We compensate this by not only taking the current delta into account, but - * also considering the delta between when the shares were last adjusted and - * now. - * - * We still saw a performance dip, some tracing learned us that between - * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased - * significantly. Therefore try to bias the error in direction of failing - * the affine wakeup. - * */ -static long effective_load(struct task_group *tg, int cpu, - long wl, long wg) +static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; if (!tg->parent) return wl; - /* - * By not taking the decrease of shares on the other cpu into - * account our error leans towards reducing the affine wakeups. - */ - if (!wl && sched_feat(ASYM_EFF_LOAD)) - return wl; - for_each_sched_entity(se) { long S, rw, s, a, b; - long more_w; - - /* - * Instead of using this increment, also add the difference - * between when the shares were last updated and now. - */ - more_w = se->my_q->load.weight - se->my_q->rq_weight; - wl += more_w; - wg += more_w; S = se->my_q->tg->shares; - s = se->my_q->shares; - rw = se->my_q->rq_weight; + s = se->load.weight; + rw = se->my_q->load.weight; a = S*(rw + wl); b = S*rw + s*wg; @@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ sd = tmp; } -#ifdef CONFIG_FAIR_GROUP_SCHED - if (sched_feat(LB_SHARES_UPDATE)) { - /* - * Pick the largest domain to update shares over - */ - tmp = sd; - if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) - tmp = affine_sd; - - if (tmp) { - raw_spin_unlock(&rq->lock); - update_shares(tmp); - raw_spin_lock(&rq->lock); - } - } -#endif - if (affine_sd) { if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) return select_idle_sibling(p, cpu); @@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, schedstat_inc(sd, lb_count[idle]); redo: - update_shares(sd); group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, cpus, balance); @@ -3156,8 +3206,6 @@ out_one_pinned: else ld_moved = 0; out: - if (ld_moved) - update_shares(sd); return ld_moved; } @@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) int update_next_balance = 0; int need_serialize; + update_shares(cpu); + for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) continue; diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 185f920ec1a..68e69acc29b 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0) SCHED_FEAT(HRTICK, 0) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(LB_BIAS, 1) -SCHED_FEAT(LB_SHARES_UPDATE, 1) -SCHED_FEAT(ASYM_EFF_LOAD, 1) /* * Spin-wait on mutex acquisition when the mutex owner is running on diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035..3132b25193d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; -static int min_sched_shares_ratelimit = 100000; /* 100 usec */ -static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ #endif #ifdef CONFIG_COMPACTION @@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, - { - .procname = "sched_shares_ratelimit", - .data = &sysctl_sched_shares_ratelimit, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_proc_update_handler, - .extra1 = &min_sched_shares_ratelimit, - .extra2 = &max_sched_shares_ratelimit, - }, { .procname = "sched_tunable_scaling", .data = &sysctl_sched_tunable_scaling, @@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_tunable_scaling, .extra2 = &max_sched_tunable_scaling, }, - { - .procname = "sched_shares_thresh", - .data = &sysctl_sched_shares_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - }, { .procname = "sched_migration_cost", .data = &sysctl_sched_migration_cost, -- cgit v1.2.3-70-g09d2 From a7a4f8a752ec734b2eab904fc863d5dc873de338 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Mon, 15 Nov 2010 15:47:06 -0800 Subject: sched: Add sysctl_sched_shares_window Introduce a new sysctl for the shares window and disambiguate it from sched_time_avg. A 10ms window appears to be a good compromise between accuracy and performance. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20101115234938.112173964@google.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched_fair.c | 9 ++++++++- kernel/sysctl.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8abb8aa5966..840f1277492 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1900,6 +1900,7 @@ extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b320753aa6c..6c84439ce98 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +/* + * The exponential sliding window over which load is averaged for shares + * distribution. + * (default: 10msec) + */ +unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; + static const struct sched_class fair_sched_class; /************************************************************** @@ -688,7 +695,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED static void update_cfs_load(struct cfs_rq *cfs_rq) { - u64 period = sched_avg_period(); + u64 period = sysctl_sched_shares_window; u64 now, delta; unsigned long load = cfs_rq->load.weight; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3132b25193d..9b520d74f05 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -332,6 +332,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_shares_window", + .data = &sysctl_sched_shares_window, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "timer_migration", .data = &sysctl_timer_migration, -- cgit v1.2.3-70-g09d2 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 +++ arch/x86/kernel/ftrace.c | 3 + include/linux/module.h | 11 ++- kernel/module.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414..45143bbcfe5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2..298448656b6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b96..ddaa689d71b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,6 +308,9 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* Size of RO sections of the module (text+rodata) */ + unsigned int init_ro_size, core_ro_size; + /* Arch-specific module values */ struct mod_arch_specific arch; @@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } - #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -687,6 +689,13 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +extern void set_all_modules_text_rw(void); +extern void set_all_modules_text_ro(void); +#else +static inline void set_all_modules_text_rw(void) { } +static inline void set_all_modules_text_ro(void) { } +#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..ba421e6b4ad 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -70,6 +71,26 @@ #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +/* + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. + */ +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +{ + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +/* Setting memory back to RW+NX before releasing it */ +void unset_section_ro_nx(struct module *mod, void *module_region) +{ + unsigned long total_pages; + + if (mod->module_core == module_region) { + /* Set core as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); + set_memory_nx((unsigned long)mod->module_core, total_pages); + set_memory_rw((unsigned long)mod->module_core, total_pages); + + } else if (mod->module_init == module_region) { + /* Set init as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); + set_memory_nx((unsigned long)mod->module_init, total_pages); + set_memory_rw((unsigned long)mod->module_init, total_pages); + } +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); +} + +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +#endif + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } DEBUGP("Init section allocation order:\n"); @@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + /* Done! */ trace_module_load(mod); return mod; @@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-70-g09d2 From 9c0729dc8062bed96189bd14ac6d4920f3958743 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Fri, 5 Nov 2010 05:59:39 -0400 Subject: x86: Eliminate bp argument from the stack tracing routines The various stack tracing routines take a 'bp' argument in which the caller is supposed to provide the base pointer to use, or 0 if doesn't have one. Since bp is garbage whenever CONFIG_FRAME_POINTER is not defined, this means all callers in principle should either always pass 0, or be conditional on CONFIG_FRAME_POINTER. However, there are only really three use cases for stack tracing: (a) Trace the current task, including IRQ stack if any (b) Trace the current task, but skip IRQ stack (c) Trace some other task In all cases, if CONFIG_FRAME_POINTER is not defined, bp should just be 0. If it _is_ defined, then - in case (a) bp should be gotten directly from the CPU's register, so the caller should pass NULL for regs, - in case (b) the caller should should pass the IRQ registers to dump_trace(), - in case (c) bp should be gotten from the top of the task's stack, so the caller should pass NULL for regs. Hence, the bp argument is not necessary because the combination of task and regs is sufficient to determine an appropriate value for bp. This patch introduces a new inline function stack_frame(task, regs) that computes the desired bp. This function is then called from the two versions of dump_stack(). Signed-off-by: Soren Sandmann Acked-by: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Arjan van de Ven , Cc: Frederic Weisbecker , Cc: Arnaldo Carvalho de Melo , LKML-Reference: > Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/kdebug.h | 2 +- arch/x86/include/asm/stacktrace.h | 33 ++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/dumpstack.c | 12 ++++++------ arch/x86/kernel/dumpstack_32.c | 25 +++++++------------------ arch/x86/kernel/dumpstack_64.c | 24 +++++++----------------- arch/x86/kernel/process.c | 3 +-- arch/x86/kernel/stacktrace.c | 8 ++++---- arch/x86/mm/kmemcheck/error.c | 2 +- arch/x86/oprofile/backtrace.c | 2 +- include/linux/stacktrace.h | 4 +++- 11 files changed, 62 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581..f23eb252846 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_registers(struct pt_regs *regs); extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp, unsigned long bp); + unsigned long *sp); extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23d..52b5c7ed360 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -7,6 +7,7 @@ #define _ASM_X86_STACKTRACE_H #include +#include extern int kstack_depth_to_print; @@ -46,7 +47,7 @@ struct stacktrace_ops { }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, + unsigned long *stack, const struct stacktrace_ops *ops, void *data); #ifdef CONFIG_X86_32 @@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + unsigned long bp; + + if (regs) + return regs->bp; + + if (task == current) { + /* Grab bp right from our regs */ + get_bp(bp); + return bp; + } + + /* bp is the last reg pushed by switch_to */ + return *(unsigned long *)task->thread.sp; +} +#else +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + return 0; +} +#endif + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); + unsigned long *stack, char *log_lvl); extern void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); + unsigned long *sp, char *log_lvl); extern unsigned int code_bytes; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183ef..461a85dcaba 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1666,7 +1666,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) perf_callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, &backtrace_ops, entry); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd5..8474c998cbd 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) + unsigned long *stack, char *log_lvl) { printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); } void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) + unsigned long *stack) { - show_trace_log_lvl(task, regs, stack, bp, ""); + show_trace_log_lvl(task, regs, stack, ""); } void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, 0, ""); + show_stack_log_lvl(task, NULL, sp, ""); } /* @@ -210,7 +210,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); + show_trace(NULL, NULL, &stack); } EXPORT_SYMBOL(dump_stack); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bd..74cc1eda384 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -17,11 +17,12 @@ #include -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { int graph = 0; + unsigned long bp; if (!task) task = current; @@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); for (;;) { struct thread_info *context; @@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *stack; int i; @@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, touch_nmi_watchdog(); } printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } @@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) u8 *ip; printk(KERN_EMERG "Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, - 0, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249..64101335de1 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); @@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned used = 0; struct thread_info *tinfo; int graph = 0; + unsigned long bp; if (!task) task = current; @@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested @@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *irq_stack_end; unsigned long *irq_stack; @@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, preempt_enable(); printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } void show_registers(struct pt_regs *regs) @@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - regs->bp, KERN_EMERG); + KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86a..96ed1aac543 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -91,8 +91,7 @@ void exit_thread(void) void show_regs(struct pt_regs *regs) { show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), - regs->bp); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); } void show_regs_common(void) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a..938c8e10a19 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { */ void save_stack_trace(struct stack_trace *trace) { - dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); + dump_trace(current, NULL, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) +void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) { - dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); + dump_trace(current, regs, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); + dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436..704a37ceddd 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, e->trace.entries = e->trace_entries; e->trace.max_entries = ARRAY_SIZE(e->trace_entries); e->trace.skip = 0; - save_stack_trace_bp(&e->trace, regs->bp); + save_stack_trace_regs(&e->trace, regs); /* Round address down to nearest 16 bytes */ shadow_copy = kmemcheck_shadow_lookup(address diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a3..72cbec14d78 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) - dump_trace(NULL, regs, (unsigned long *)stack, 0, + dump_trace(NULL, regs, (unsigned long *)stack, &backtrace_ops, &depth); return; } diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 51efbef38fb..25310f1d7f3 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -2,6 +2,7 @@ #define __LINUX_STACKTRACE_H struct task_struct; +struct pt_regs; #ifdef CONFIG_STACKTRACE struct task_struct; @@ -13,7 +14,8 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); +extern void save_stack_trace_regs(struct stack_trace *trace, + struct pt_regs *regs); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); -- cgit v1.2.3-70-g09d2 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 2 ++ kernel/perf_event.c | 9 --------- kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8beabb958f6..312dce7e0d5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -154,12 +154,14 @@ enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, + TRACE_EVENT_FL_CAP_ANY_BIT, }; enum { TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), + TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), }; struct ftrace_event_call { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f498..ee1e903f983 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4747,15 +4747,6 @@ static int perf_tp_event_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_TRACEPOINT) return -ENOENT; - /* - * Raw tracepoint data is a severe data leak, only allow root to - * have these. - */ - if ((event->attr.sample_type & PERF_SAMPLE_RAW) && - perf_paranoid_tracepoint_raw() && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - err = perf_trace_init(event); if (err) return err; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670..19a359d5e6d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; +static int perf_trace_event_perm(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + /* No tracing, just counting, so no obvious leak */ + if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) + return 0; + + /* Some events are ok to be traced by non-root users... */ + if (p_event->attach_state == PERF_ATTACH_TASK) { + if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) + return 0; + } + + /* + * ...otherwise raw tracepoint data can be a severe data leak, + * only allow root to have these. + */ + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret = -ENOMEM; + int ret; int cpu; + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; + ret = -ENOMEM; + list = alloc_percpu(struct hlist_head); if (!list) goto fail; -- cgit v1.2.3-70-g09d2 From 1ed0c5971159974185653170543a764cc061c857 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:46:57 +0100 Subject: tracing: New macro to set up initial event flags value This introduces the new TRACE_EVENT_FLAGS() macro in order to set up initial event flags value. This macro must simply follow the definition of a trace event and take the event name and the flag value as parameters: TRACE_EVENT(my_event, ..... .... ); TRACE_EVENT_FLAGS(my_event, 1) This will set up 1 as the initial my_event->flags value. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/tracepoint.h | 4 ++++ include/trace/ftrace.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a4a90b6726c..5a6074fcd81 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -234,6 +234,8 @@ do_trace: \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -354,4 +356,6 @@ do_trace: \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a9377c0083a..6f540123d43 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -82,6 +82,15 @@ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -129,6 +138,9 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(event, flag) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* -- cgit v1.2.3-70-g09d2 From 53cf810b1934f08a68e131aeeb16267a778f43df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:11:42 +0100 Subject: tracing: Allow syscall trace events for non privileged users As for the raw syscalls events, individual syscall events won't leak system wide information on task bound tracing. Allow non privileged users to use them in such workflow. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 8 ++++++++ include/linux/syscalls.h | 6 ++++-- include/trace/ftrace.h | 7 +------ 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 312dce7e0d5..725bf6bd39f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -198,6 +198,14 @@ struct ftrace_event_call { #endif }; +#define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cacc27a0e28..13b9731d30c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -137,7 +137,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ @@ -152,7 +153,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 6f540123d43..e718a917d89 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -84,12 +84,7 @@ #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(name, value) \ - static int __init trace_init_flags_##name(void) \ - { \ - event_##name.flags = value; \ - return 0; \ - } \ - early_initcall(trace_init_flags_##name); + __TRACE_EVENT_FLAGS(name, value) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3-70-g09d2 From 423478cde453eebdfcfebf4b8d378d8f5d49b853 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:21:26 +0100 Subject: tracing: Remove useless syscall ftrace_event_call declaration It is defined right after, which makes the declaration completely useless. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/syscalls.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 13b9731d30c..18cd0684fc4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -143,8 +141,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ -- cgit v1.2.3-70-g09d2 From 866f3b25a2eb60d7529c227a0ecd80c3aba443fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Nov 2010 09:33:19 -0800 Subject: bonding: IGMP handling cleanup Instead of iterating in_dev->mc_list from bonding driver, its better to call a helper function provided by igmp.c Details of implementation (locking) are private to igmp code. ip_mc_rejoin_group(struct ip_mc_list *im) becomes ip_mc_rejoin_groups(struct in_device *in_dev); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 ++------ include/linux/igmp.h | 2 +- net/ipv4/igmp.c | 32 +++++++++++++++++++------------- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 518844852f0..e588b2e1c3b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -873,15 +873,11 @@ static void bond_mc_del(struct bonding *bond, void *addr) static void __bond_resend_igmp_join_requests(struct net_device *dev) { struct in_device *in_dev; - struct ip_mc_list *im; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) { - for (im = in_dev->mc_list; im; im = im->next) - ip_mc_rejoin_group(im); - } - + if (in_dev) + ip_mc_rejoin_groups(in_dev); rcu_read_unlock(); } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 7d164670f26..c4987f26510 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -238,7 +238,7 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); -extern void ip_mc_rejoin_group(struct ip_mc_list *im); +extern void ip_mc_rejoin_groups(struct in_device *in_dev); #endif #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index afb1e82a59f..50f6bc1a002 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1267,26 +1267,32 @@ EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. + * Called with rcu_read_lock() */ -void ip_mc_rejoin_group(struct ip_mc_list *im) +void ip_mc_rejoin_groups(struct in_device *in_dev) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = im->interface; + struct ip_mc_list *im; + int type; - if (im->multiaddr == IGMP_ALL_HOSTS) - return; + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; - /* a failover is happening and switches - * must be notified immediately */ - if (IGMP_V1_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); - else if (IGMP_V2_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); - else - igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); + /* a failover is happening and switches + * must be notified immediately + */ + if (IGMP_V1_SEEN(in_dev)) + type = IGMP_HOST_MEMBERSHIP_REPORT; + else if (IGMP_V2_SEEN(in_dev)) + type = IGMPV2_HOST_MEMBERSHIP_REPORT; + else + type = IGMPV3_HOST_MEMBERSHIP_REPORT; + igmp_send_report(in_dev, im, type); + } #endif } -EXPORT_SYMBOL(ip_mc_rejoin_group); +EXPORT_SYMBOL(ip_mc_rejoin_groups); /* * A socket has left a multicast group on device dev -- cgit v1.2.3-70-g09d2 From 4c3710afbc333c33100739dec10662b4ee64e219 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 16 Nov 2010 20:28:24 +0000 Subject: net: move definitions of BPF_S_* to net/core/filter.c BPF_S_* are used internally, should not be exposed to the others. Signed-off-by: Changli Gao Acked-by: Eric Dumazet Acked-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- include/linux/filter.h | 48 ------------------------------------------------ net/core/filter.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 69b43dbea6c..151f5d703b7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -91,54 +91,6 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define BPF_TAX 0x00 #define BPF_TXA 0x80 -enum { - BPF_S_RET_K = 0, - BPF_S_RET_A, - BPF_S_ALU_ADD_K, - BPF_S_ALU_ADD_X, - BPF_S_ALU_SUB_K, - BPF_S_ALU_SUB_X, - BPF_S_ALU_MUL_K, - BPF_S_ALU_MUL_X, - BPF_S_ALU_DIV_X, - BPF_S_ALU_AND_K, - BPF_S_ALU_AND_X, - BPF_S_ALU_OR_K, - BPF_S_ALU_OR_X, - BPF_S_ALU_LSH_K, - BPF_S_ALU_LSH_X, - BPF_S_ALU_RSH_K, - BPF_S_ALU_RSH_X, - BPF_S_ALU_NEG, - BPF_S_LD_W_ABS, - BPF_S_LD_H_ABS, - BPF_S_LD_B_ABS, - BPF_S_LD_W_LEN, - BPF_S_LD_W_IND, - BPF_S_LD_H_IND, - BPF_S_LD_B_IND, - BPF_S_LD_IMM, - BPF_S_LDX_W_LEN, - BPF_S_LDX_B_MSH, - BPF_S_LDX_IMM, - BPF_S_MISC_TAX, - BPF_S_MISC_TXA, - BPF_S_ALU_DIV_K, - BPF_S_LD_MEM, - BPF_S_LDX_MEM, - BPF_S_ST, - BPF_S_STX, - BPF_S_JMP_JA, - BPF_S_JMP_JEQ_K, - BPF_S_JMP_JEQ_X, - BPF_S_JMP_JGE_K, - BPF_S_JMP_JGE_X, - BPF_S_JMP_JGT_K, - BPF_S_JMP_JGT_X, - BPF_S_JMP_JSET_K, - BPF_S_JMP_JSET_X, -}; - #ifndef BPF_MAXINSNS #define BPF_MAXINSNS 4096 #endif diff --git a/net/core/filter.c b/net/core/filter.c index 03dc0710194..15a545d39cd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -38,6 +38,54 @@ #include #include +enum { + BPF_S_RET_K = 0, + BPF_S_RET_A, + BPF_S_ALU_ADD_K, + BPF_S_ALU_ADD_X, + BPF_S_ALU_SUB_K, + BPF_S_ALU_SUB_X, + BPF_S_ALU_MUL_K, + BPF_S_ALU_MUL_X, + BPF_S_ALU_DIV_X, + BPF_S_ALU_AND_K, + BPF_S_ALU_AND_X, + BPF_S_ALU_OR_K, + BPF_S_ALU_OR_X, + BPF_S_ALU_LSH_K, + BPF_S_ALU_LSH_X, + BPF_S_ALU_RSH_K, + BPF_S_ALU_RSH_X, + BPF_S_ALU_NEG, + BPF_S_LD_W_ABS, + BPF_S_LD_H_ABS, + BPF_S_LD_B_ABS, + BPF_S_LD_W_LEN, + BPF_S_LD_W_IND, + BPF_S_LD_H_IND, + BPF_S_LD_B_IND, + BPF_S_LD_IMM, + BPF_S_LDX_W_LEN, + BPF_S_LDX_B_MSH, + BPF_S_LDX_IMM, + BPF_S_MISC_TAX, + BPF_S_MISC_TXA, + BPF_S_ALU_DIV_K, + BPF_S_LD_MEM, + BPF_S_LDX_MEM, + BPF_S_ST, + BPF_S_STX, + BPF_S_JMP_JA, + BPF_S_JMP_JEQ_K, + BPF_S_JMP_JEQ_X, + BPF_S_JMP_JGE_K, + BPF_S_JMP_JGE_X, + BPF_S_JMP_JGT_K, + BPF_S_JMP_JGT_X, + BPF_S_JMP_JSET_K, + BPF_S_JMP_JSET_X, +}; + /* No hurry in this branch */ static void *__load_pointer(struct sk_buff *skb, int k) { -- cgit v1.2.3-70-g09d2 From c5485a7e7569ab32eea240c850198519e2a765ef Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Tue, 16 Nov 2010 10:58:37 +0900 Subject: lib: Add generic exponentially weighted moving average (EWMA) function This adds generic functions for calculating Exponentially Weighted Moving Averages (EWMA). This implementation makes use of a structure which keeps the EWMA parameters and a scaled up internal representation to reduce rounding errors. The original idea for this implementation came from the rt2x00 driver (rt2x00link.c). I would like to use it in several places in the mac80211 and ath5k code and I hope it can be useful in many other places in the kernel code. Signed-off-by: Bruno Randolf Reviewed-by: KOSAKI Motohiro Signed-off-by: John W. Linville --- include/linux/average.h | 32 +++++++++++++++++++++++++++ lib/Kconfig | 3 +++ lib/Makefile | 2 ++ lib/average.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 include/linux/average.h create mode 100644 lib/average.c (limited to 'include/linux') diff --git a/include/linux/average.h b/include/linux/average.h new file mode 100644 index 00000000000..7706e40f95f --- /dev/null +++ b/include/linux/average.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_AVERAGE_H +#define _LINUX_AVERAGE_H + +#include + +/* Exponentially weighted moving average (EWMA) */ + +/* For more documentation see lib/average.c */ + +struct ewma { + unsigned long internal; + unsigned long factor; + unsigned long weight; +}; + +extern void ewma_init(struct ewma *avg, unsigned long factor, + unsigned long weight); + +extern struct ewma *ewma_add(struct ewma *avg, unsigned long val); + +/** + * ewma_read() - Get average value + * @avg: Average structure + * + * Returns the average value held in @avg. + */ +static inline unsigned long ewma_read(const struct ewma *avg) +{ + return DIV_ROUND_CLOSEST(avg->internal, avg->factor); +} + +#endif /* _LINUX_AVERAGE_H */ diff --git a/lib/Kconfig b/lib/Kconfig index fa9bf2c0619..3116aa631af 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -210,4 +210,7 @@ config GENERIC_ATOMIC64 config LRU_CACHE tristate +config AVERAGE + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index e6a3763b821..76d3b851490 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -106,6 +106,8 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o +obj-$(CONFIG_AVERAGE) += average.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/average.c b/lib/average.c new file mode 100644 index 00000000000..f1d1b4660c4 --- /dev/null +++ b/lib/average.c @@ -0,0 +1,57 @@ +/* + * lib/average.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include + +/** + * DOC: Exponentially Weighted Moving Average (EWMA) + * + * These are generic functions for calculating Exponentially Weighted Moving + * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled + * up internal representation of the average value to prevent rounding errors. + * The factor for scaling up and the exponential weight (or decay rate) have to + * be specified thru the init fuction. The structure should not be accessed + * directly but only thru the helper functions. + */ + +/** + * ewma_init() - Initialize EWMA parameters + * @avg: Average structure + * @factor: Factor to use for the scaled up internal value. The maximum value + * of averages can be ULONG_MAX/(factor*weight). + * @weight: Exponential weight, or decay rate. This defines how fast the + * influence of older values decreases. Has to be bigger than 1. + * + * Initialize the EWMA parameters for a given struct ewma @avg. + */ +void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight) +{ + WARN_ON(weight <= 1 || factor == 0); + avg->internal = 0; + avg->weight = weight; + avg->factor = factor; +} +EXPORT_SYMBOL(ewma_init); + +/** + * ewma_add() - Exponentially weighted moving average (EWMA) + * @avg: Average structure + * @val: Current value + * + * Add a sample to the average. + */ +struct ewma *ewma_add(struct ewma *avg, unsigned long val) +{ + avg->internal = avg->internal ? + (((avg->internal * (avg->weight - 1)) + + (val * avg->factor)) / avg->weight) : + (val * avg->factor); + return avg; +} +EXPORT_SYMBOL(ewma_add); -- cgit v1.2.3-70-g09d2 From 86107fd170bc379869250eb7e1bd393a3a70e8ae Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Tue, 16 Nov 2010 10:58:48 +0900 Subject: nl80211/mac80211: Report signal average Extend nl80211 to report an exponential weighted moving average (EWMA) of the signal value. Since the signal value usually fluctuates between different packets, an average can be more useful than the value of the last packet. This uses the recently added generic EWMA library function. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 4 ++++ net/mac80211/Kconfig | 1 + net/mac80211/cfg.c | 3 ++- net/mac80211/rx.c | 1 + net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 3 +++ net/wireless/nl80211.c | 3 +++ 8 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e49889..1ce3775e9e2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,6 +1161,7 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1178,6 +1179,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8fd9eebd0cc..69e2364889f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,6 +424,7 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled + * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -439,6 +440,7 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, + STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -485,6 +487,7 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm + * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -505,6 +508,7 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; + s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d6f8653ec8..798d9b9462e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 + select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479..92c9cf6a7d1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d2fcd22ab06..9dd60a74181 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,6 +1156,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; + ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eff58571fd7..f43fca8907f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; + ewma_init(&sta->avg_signal, 1000, 8); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9265acadef3..84062e2c782 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "key.h" /** @@ -224,6 +225,7 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA + * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -291,6 +293,7 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; + struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 60555384222..d06a40d1700 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,6 +1872,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); + if (sinfo->filled & STATION_INFO_SIGNAL_AVG) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, + sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-70-g09d2 From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ include/trace/ftrace.h | 14 ++++++++++---- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 725bf6bd39f..47e3997f7b5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -225,6 +225,10 @@ enum { FILTER_PTR_STRING, }; +#define EVENT_STORAGE_SIZE 128 +extern struct mutex event_storage_mutex; +extern char event_storage[EVENT_STORAGE_SIZE]; + extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e718a917d89..e16610c208c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -296,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + mutex_lock(&event_storage_mutex); \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __dynamic_array #define __dynamic_array(type, item, len) \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0725eeab193..35fde09b81d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4ba44deaac2..4b74d71705c 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __array_desc #define __array_desc(type, container, item, len) \ -- cgit v1.2.3-70-g09d2 From 93aaae2e01e57483256b7da05c9a7ebd65ad4686 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Nov 2010 09:49:59 -0800 Subject: filter: optimize sk_run_filter Remove pc variable to avoid arithmetic to compute fentry at each filter instruction. Jumps directly manipulate fentry pointer. As the last instruction of filter[] is guaranteed to be a RETURN, and all jumps are before the last instruction, we dont need to check filter bounds (number of instructions in filter array) at each iteration, so we remove it from sk_run_filter() params. On x86_32 remove f_k var introduced in commit 57fe93b374a6b871 (filter: make sure filters dont read uninitialized memory) Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to avoid too many ifdefs in this code. This helps compiler to use cpu registers to hold fentry and A accumulator. On x86_32, this saves 401 bytes, and more important, sk_run_filter() runs much faster because less register pressure (One less conditional branch per BPF instruction) # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 2948 0 0 2948 b84 net/core/filter.o 3349 0 0 3349 d15 net/core/filter_pre.o on x86_64 : # size net/core/filter.o net/core/filter_pre.o text data bss dec hex filename 5173 0 0 5173 1435 net/core/filter.o 5224 0 0 5224 1468 net/core/filter_pre.o Signed-off-by: Eric Dumazet Acked-by: Changli Gao Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 14 +++---- drivers/net/ppp_generic.c | 12 ++---- include/linux/filter.h | 2 +- net/core/filter.c | 93 +++++++++++++++++++++++---------------------- net/core/timestamping.c | 2 +- net/packet/af_packet.c | 2 +- 6 files changed, 61 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 97c5cc2997f..9e8162c80bb 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff } if (is->pass_filter - && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) { + && sk_run_filter(skb, is->pass_filter) == 0) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); kfree_skb(skb); return; } if (!(is->active_filter - && sk_run_filter(skb, is->active_filter, - is->active_len) == 0)) { + && sk_run_filter(skb, is->active_filter) == 0)) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); lp->huptimer = 0; @@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) } if (ipt->pass_filter - && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) { + && sk_run_filter(skb, ipt->pass_filter) == 0) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); kfree_skb(skb); goto unlock; } if (!(ipt->active_filter - && sk_run_filter(skb, ipt->active_filter, - ipt->active_len) == 0)) { + && sk_run_filter(skb, ipt->active_filter) == 0)) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n"); lp->huptimer = 0; @@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) } drop |= is->pass_filter - && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0; + && sk_run_filter(skb, is->pass_filter) == 0; drop |= is->active_filter - && sk_run_filter(skb, is->active_filter, is->active_len) == 0; + && sk_run_filter(skb, is->active_filter) == 0; skb_push(skb, IPPP_MAX_HEADER - 4); return drop; diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 09cf56d0416..0c91598ae28 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) a four-byte PPP header on each packet */ *skb_push(skb, 2) = 1; if (ppp->pass_filter && - sk_run_filter(skb, ppp->pass_filter, - ppp->pass_len) == 0) { + sk_run_filter(skb, ppp->pass_filter) == 0) { if (ppp->debug & 1) printk(KERN_DEBUG "PPP: outbound frame not passed\n"); kfree_skb(skb); @@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } /* if this packet passes the active filter, record the time */ if (!(ppp->active_filter && - sk_run_filter(skb, ppp->active_filter, - ppp->active_len) == 0)) + sk_run_filter(skb, ppp->active_filter) == 0)) ppp->last_xmit = jiffies; skb_pull(skb, 2); #else @@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) *skb_push(skb, 2) = 0; if (ppp->pass_filter && - sk_run_filter(skb, ppp->pass_filter, - ppp->pass_len) == 0) { + sk_run_filter(skb, ppp->pass_filter) == 0) { if (ppp->debug & 1) printk(KERN_DEBUG "PPP: inbound frame " "not passed\n"); @@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) return; } if (!(ppp->active_filter && - sk_run_filter(skb, ppp->active_filter, - ppp->active_len) == 0)) + sk_run_filter(skb, ppp->active_filter) == 0)) ppp->last_recv = jiffies; __skb_pull(skb, 2); } else diff --git a/include/linux/filter.h b/include/linux/filter.h index 151f5d703b7..447a775878f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -147,7 +147,7 @@ struct sock; extern int sk_filter(struct sock *sk, struct sk_buff *skb); extern unsigned int sk_run_filter(struct sk_buff *skb, - struct sock_filter *filter, int flen); + const struct sock_filter *filter); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, int flen); diff --git a/net/core/filter.c b/net/core/filter.c index 15a545d39cd..9e77b3c816c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); + unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter); * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on * @filter: filter to apply - * @flen: length of filter * * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. skb is the data we are - * filtering, filter is the array of filter instructions, and - * len is the number of filter blocks in the array. + * Return length to keep, 0 for none. @skb is the data we are + * filtering, @filter is the array of filter instructions. + * Because all jumps are guaranteed to be before last instruction, + * and last instruction guaranteed to be a RET, we dont need to check + * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry) { void *ptr; u32 A = 0; /* Accumulator */ @@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int unsigned long memvalid = 0; u32 tmp; int k; - int pc; BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); /* * Process array of filter instructions. */ - for (pc = 0; pc < flen; pc++) { - const struct sock_filter *fentry = &filter[pc]; - u32 f_k = fentry->k; + for (;; fentry++) { +#if defined(CONFIG_X86_32) +#define K (fentry->k) +#else + const u32 K = fentry->k; +#endif switch (fentry->code) { case BPF_S_ALU_ADD_X: A += X; continue; case BPF_S_ALU_ADD_K: - A += f_k; + A += K; continue; case BPF_S_ALU_SUB_X: A -= X; continue; case BPF_S_ALU_SUB_K: - A -= f_k; + A -= K; continue; case BPF_S_ALU_MUL_X: A *= X; continue; case BPF_S_ALU_MUL_K: - A *= f_k; + A *= K; continue; case BPF_S_ALU_DIV_X: if (X == 0) @@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int A /= X; continue; case BPF_S_ALU_DIV_K: - A /= f_k; + A /= K; continue; case BPF_S_ALU_AND_X: A &= X; continue; case BPF_S_ALU_AND_K: - A &= f_k; + A &= K; continue; case BPF_S_ALU_OR_X: A |= X; continue; case BPF_S_ALU_OR_K: - A |= f_k; + A |= K; continue; case BPF_S_ALU_LSH_X: A <<= X; continue; case BPF_S_ALU_LSH_K: - A <<= f_k; + A <<= K; continue; case BPF_S_ALU_RSH_X: A >>= X; continue; case BPF_S_ALU_RSH_K: - A >>= f_k; + A >>= K; continue; case BPF_S_ALU_NEG: A = -A; continue; case BPF_S_JMP_JA: - pc += f_k; + fentry += K; continue; case BPF_S_JMP_JGT_K: - pc += (A > f_k) ? fentry->jt : fentry->jf; + fentry += (A > K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_K: - pc += (A >= f_k) ? fentry->jt : fentry->jf; + fentry += (A >= K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_K: - pc += (A == f_k) ? fentry->jt : fentry->jf; + fentry += (A == K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_K: - pc += (A & f_k) ? fentry->jt : fentry->jf; + fentry += (A & K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGT_X: - pc += (A > X) ? fentry->jt : fentry->jf; + fentry += (A > X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_X: - pc += (A >= X) ? fentry->jt : fentry->jf; + fentry += (A >= X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_X: - pc += (A == X) ? fentry->jt : fentry->jf; + fentry += (A == X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_X: - pc += (A & X) ? fentry->jt : fentry->jf; + fentry += (A & X) ? fentry->jt : fentry->jf; continue; case BPF_S_LD_W_ABS: - k = f_k; + k = K; load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { @@ -268,7 +271,7 @@ load_w: } break; case BPF_S_LD_H_ABS: - k = f_k; + k = K; load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { @@ -277,7 +280,7 @@ load_h: } break; case BPF_S_LD_B_ABS: - k = f_k; + k = K; load_b: ptr = load_pointer(skb, k, 1, &tmp); if (ptr != NULL) { @@ -292,34 +295,34 @@ load_b: X = skb->len; continue; case BPF_S_LD_W_IND: - k = X + f_k; + k = X + K; goto load_w; case BPF_S_LD_H_IND: - k = X + f_k; + k = X + K; goto load_h; case BPF_S_LD_B_IND: - k = X + f_k; + k = X + K; goto load_b; case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, f_k, 1, &tmp); + ptr = load_pointer(skb, K, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; case BPF_S_LD_IMM: - A = f_k; + A = K; continue; case BPF_S_LDX_IMM: - X = f_k; + X = K; continue; case BPF_S_LD_MEM: - A = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + A = (memvalid & (1UL << K)) ? + mem[K] : 0; continue; case BPF_S_LDX_MEM: - X = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + X = (memvalid & (1UL << K)) ? + mem[K] : 0; continue; case BPF_S_MISC_TAX: X = A; @@ -328,16 +331,16 @@ load_b: A = X; continue; case BPF_S_RET_K: - return f_k; + return K; case BPF_S_RET_A: return A; case BPF_S_ST: - memvalid |= 1UL << f_k; - mem[f_k] = A; + memvalid |= 1UL << K; + mem[K] = A; continue; case BPF_S_STX: - memvalid |= 1UL << f_k; - mem[f_k] = X; + memvalid |= 1UL << K; + mem[K] = X; continue; default: WARN_ON(1); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 0ae6c22da85..dac7ed687f6 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb) if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + return sk_run_filter(skb, ptp_filter); else return PTP_CLASS_NONE; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20964560a0e..b6372dd128d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) - res = sk_run_filter(skb, filter->insns, filter->len); + res = sk_run_filter(skb, filter->insns); rcu_read_unlock_bh(); return res; -- cgit v1.2.3-70-g09d2 From 07a8cdd2bb17a4da68136d963b8bc71959bd31a5 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Thu, 18 Nov 2010 18:54:17 +0530 Subject: usb: musb: do not use dma for control transfers The Inventra DMA engine used with the MUSB controller in many SoCs cannot use DMA for control transfers on EP0, but can use DMA for all other transfers. The USB core maps urbs for DMA if hcd->self.uses_dma is true. (hcd->self.uses_dma is true for MUSB as well). Split the uses_dma flag into two - one that says if the controller needs to use PIO for control transfers, and another which says if the controller uses DMA (for all other transfers). Also, populate this flag for all MUSB by default. (Tested on OMAP3 and OMAP4 boards, with EHCI and MUSB HCDs simultaneously in use). Signed-off-by: Maulik Mankad Signed-off-by: Santosh Shilimkar Signed-off-by: Anand Gadiyar Cc: Oliver Neukum Cc: Alan Stern Cc: Praveena NADAHALLY Cc: Ajay Kumar Gupta Signed-off-by: Felipe Balbi --- drivers/usb/core/hcd.c | 2 ++ drivers/usb/musb/musb_core.c | 3 +++ drivers/usb/musb/musb_gadget.c | 4 ++++ include/linux/usb.h | 4 ++++ 4 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 61800f77dac..ced846ac414 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1330,6 +1330,8 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, */ if (usb_endpoint_xfer_control(&urb->ep->desc)) { + if (hcd->self.uses_pio_for_control) + return ret; if (hcd->self.uses_dma) { urb->setup_dma = dma_map_single( hcd->self.controller, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index e6669fc3b80..99beebce855 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2116,12 +2116,15 @@ bad_config: * Otherwise, wait till the gadget driver hooks up. */ if (!is_otg_enabled(musb) && is_host_enabled(musb)) { + struct usb_hcd *hcd = musb_to_hcd(musb); + MUSB_HST_MODE(musb); musb->xceiv->default_a = 1; musb->xceiv->state = OTG_STATE_A_IDLE; status = usb_add_hcd(musb_to_hcd(musb), -1, 0); + hcd->self.uses_pio_for_control = 1; DBG(1, "%s mode, status %d, devctl %02x %c\n", "HOST", status, musb_readb(musb->mregs, MUSB_DEVCTL), diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index c8de0e123f2..9d6ade82b9f 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1833,6 +1833,8 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver, spin_unlock_irqrestore(&musb->lock, flags); if (is_otg_enabled(musb)) { + struct usb_hcd *hcd = musb_to_hcd(musb); + DBG(3, "OTG startup...\n"); /* REVISIT: funcall to other code, which also @@ -1847,6 +1849,8 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver, musb->gadget_driver = NULL; musb->g.dev.driver = NULL; spin_unlock_irqrestore(&musb->lock, flags); + } else { + hcd->self.uses_pio_for_control = 1; } } } diff --git a/include/linux/usb.h b/include/linux/usb.h index 24300d8a1bc..a28eb259257 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -313,6 +313,10 @@ struct usb_bus { int busnum; /* Bus number (in order of reg) */ const char *bus_name; /* stable id (PCI slot_name etc) */ u8 uses_dma; /* Does the host controller use DMA? */ + u8 uses_pio_for_control; /* + * Does the host controller use PIO + * for control transfers? + */ u8 otg_port; /* 0, or number of OTG/HNP port */ unsigned is_b_host:1; /* true during some HNP roleswitches */ unsigned b_hnp_enable:1; /* OTG: did A-Host enable HNP? */ -- cgit v1.2.3-70-g09d2 From eb06acdc85585f28864261f28659157848762ee4 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 28 Oct 2010 13:10:50 +0000 Subject: macvlan: Introduce 'passthru' mode to takeover the underlying device With the current default 'vepa' mode, a KVM guest using virtio with macvtap backend has the following limitations. - cannot change/add a mac address on the guest virtio-net - cannot create a vlan device on the guest virtio-net - cannot enable promiscuous mode on guest virtio-net To address these limitations, this patch introduces a new mode called 'passthru' when creating a macvlan device which allows takeover of the underlying device and passing it to a guest using virtio with macvtap backend. Only one macvlan device is allowed in passthru mode and it inherits the mac address from the underlying device and sets it in promiscuous mode to receive and forward all the packets. Signed-off-by: Sridhar Samudrala ------------------------------------------------------------------------- Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 33 ++++++++++++++++++++++++++++++++- include/linux/if_link.h | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 93f0ba25c80..6ed577b065d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -38,6 +38,7 @@ struct macvlan_port { struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; struct list_head vlans; struct rcu_head rcu; + bool passthru; }; #define macvlan_port_get_rcu(dev) \ @@ -169,6 +170,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) macvlan_broadcast(skb, port, NULL, MACVLAN_MODE_PRIVATE | MACVLAN_MODE_VEPA | + MACVLAN_MODE_PASSTHRU| MACVLAN_MODE_BRIDGE); else if (src->mode == MACVLAN_MODE_VEPA) /* flood to everyone except source */ @@ -185,7 +187,10 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) return skb; } - vlan = macvlan_hash_lookup(port, eth->h_dest); + if (port->passthru) + vlan = list_first_entry(&port->vlans, struct macvlan_dev, list); + else + vlan = macvlan_hash_lookup(port, eth->h_dest); if (vlan == NULL) return skb; @@ -288,6 +293,11 @@ static int macvlan_open(struct net_device *dev) struct net_device *lowerdev = vlan->lowerdev; int err; + if (vlan->port->passthru) { + dev_set_promiscuity(lowerdev, 1); + goto hash_add; + } + err = -EBUSY; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; @@ -300,6 +310,8 @@ static int macvlan_open(struct net_device *dev) if (err < 0) goto del_unicast; } + +hash_add: macvlan_hash_add(vlan); return 0; @@ -314,12 +326,18 @@ static int macvlan_stop(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; + if (vlan->port->passthru) { + dev_set_promiscuity(lowerdev, -1); + goto hash_del; + } + dev_mc_unsync(lowerdev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); dev_uc_del(lowerdev, dev->dev_addr); +hash_del: macvlan_hash_del(vlan); return 0; } @@ -559,6 +577,7 @@ static int macvlan_port_create(struct net_device *dev) if (port == NULL) return -ENOMEM; + port->passthru = false; port->dev = dev; INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) @@ -603,6 +622,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) case MACVLAN_MODE_PRIVATE: case MACVLAN_MODE_VEPA: case MACVLAN_MODE_BRIDGE: + case MACVLAN_MODE_PASSTHRU: break; default: return -EINVAL; @@ -652,6 +672,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, } port = macvlan_port_get(lowerdev); + /* Only 1 macvlan device can be created in passthru mode */ + if (port->passthru) + return -EINVAL; + vlan->lowerdev = lowerdev; vlan->dev = dev; vlan->port = port; @@ -662,6 +686,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (data && data[IFLA_MACVLAN_MODE]) vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); + if (vlan->mode == MACVLAN_MODE_PASSTHRU) { + if (!list_empty(&port->vlans)) + return -EINVAL; + port->passthru = true; + memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN); + } + err = register_netdevice(dev); if (err < 0) goto destroy_port; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2e02e4d7b11..6485d2a89be 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -259,6 +259,7 @@ enum macvlan_mode { MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ }; /* SR-IOV virtual function management section */ -- cgit v1.2.3-70-g09d2 From 1d8638d4038eb8709edc80e37a0bbb77253d86e9 Mon Sep 17 00:00:00 2001 From: Daniel Klaffenbach Date: Fri, 19 Nov 2010 21:25:21 -0600 Subject: ssb: b43-pci-bridge: Add new vendor for BCM4318 Add new vendor for Broadcom 4318. Signed-off-by: Daniel Klaffenbach Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville --- drivers/ssb/b43_pci_bridge.c | 1 + include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/b43_pci_bridge.c b/drivers/ssb/b43_pci_bridge.c index ef9c6a04ad8..744d3f6e470 100644 --- a/drivers/ssb/b43_pci_bridge.c +++ b/drivers/ssb/b43_pci_bridge.c @@ -24,6 +24,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) }, + { PCI_DEVICE(PCI_VENDOR_ID_BCM_GVC, 0x4318) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4321) }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d278dd9cb76..f29c25ede70 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2043,6 +2043,7 @@ #define PCI_DEVICE_ID_AFAVLAB_P030 0x2182 #define PCI_SUBDEVICE_ID_AFAVLAB_P061 0x2150 +#define PCI_VENDOR_ID_BCM_GVC 0x14a4 #define PCI_VENDOR_ID_BROADCOM 0x14e4 #define PCI_DEVICE_ID_TIGON3_5752 0x1600 #define PCI_DEVICE_ID_TIGON3_5752M 0x1601 -- cgit v1.2.3-70-g09d2 From 293bb1c41b728d4aa248fe8a0acd2b9066ff5c34 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 24 Nov 2010 02:38:05 +0000 Subject: stmmac: add init/exit callback in plat_stmmacenet_data struct This patch adds in the plat_stmmacenet_data the init and exit callbacks that can be used for invoking specific platform functions. For example, on ST targets, these call the PAD manager functions to set PIO lines and syscfg registers. The patch removes the stmmac_claim_resource only used on STM Kernels as well. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/stmmac/stmmac.h | 22 ---------------------- drivers/net/stmmac/stmmac_main.c | 18 +++++++++++++----- include/linux/stmmac.h | 6 +++--- 3 files changed, 16 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h index 31575670d86..8ae76501eb7 100644 --- a/drivers/net/stmmac/stmmac.h +++ b/drivers/net/stmmac/stmmac.h @@ -87,28 +87,6 @@ struct stmmac_priv { struct plat_stmmacenet_data *plat; }; -#ifdef CONFIG_STM_DRIVERS -#include -static inline int stmmac_claim_resource(struct platform_device *pdev) -{ - int ret = 0; - struct plat_stmmacenet_data *plat_dat = pdev->dev.platform_data; - - /* Pad routing setup */ - if (IS_ERR(devm_stm_pad_claim(&pdev->dev, plat_dat->pad_config, - dev_name(&pdev->dev)))) { - printk(KERN_ERR "%s: Failed to request pads!\n", __func__); - ret = -ENODEV; - } - return ret; -} -#else -static inline int stmmac_claim_resource(struct platform_device *pdev) -{ - return 0; -} -#endif - extern int stmmac_mdio_unregister(struct net_device *ndev); extern int stmmac_mdio_register(struct net_device *ndev); extern void stmmac_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index 29ba28660fa..b806cd3515b 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -1643,7 +1643,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr = NULL; struct net_device *ndev = NULL; - struct stmmac_priv *priv; + struct stmmac_priv *priv = NULL; struct plat_stmmacenet_data *plat_dat; pr_info("STMMAC driver:\n\tplatform registration... "); @@ -1708,10 +1708,12 @@ static int stmmac_dvr_probe(struct platform_device *pdev) /* Set the I/O base addr */ ndev->base_addr = (unsigned long)addr; - /* Verify embedded resource for the platform */ - ret = stmmac_claim_resource(pdev); - if (ret < 0) - goto out; + /* Custom initialisation */ + if (priv->plat->init) { + ret = priv->plat->init(pdev); + if (unlikely(ret)) + goto out; + } /* MAC HW revice detection */ ret = stmmac_mac_device_setup(ndev); @@ -1745,6 +1747,9 @@ static int stmmac_dvr_probe(struct platform_device *pdev) out: if (ret < 0) { + if (priv->plat->exit) + priv->plat->exit(pdev); + platform_set_drvdata(pdev, NULL); release_mem_region(res->start, resource_size(res)); if (addr != NULL) @@ -1778,6 +1783,9 @@ static int stmmac_dvr_remove(struct platform_device *pdev) stmmac_mdio_unregister(ndev); + if (priv->plat->exit) + priv->plat->exit(pdev); + platform_set_drvdata(pdev, NULL); unregister_netdev(ndev); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d66c61774d9..e1035291569 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -40,9 +40,9 @@ struct plat_stmmacenet_data { int pmt; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); -#ifdef CONFIG_STM_DRIVERS - struct stm_pad_config *pad_config; -#endif + int (*init)(struct platform_device *pdev); + void (*exit)(struct platform_device *pdev); + void *custom_cfg; void *bsp_priv; }; -- cgit v1.2.3-70-g09d2 From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 13:12:15 +0000 Subject: ipv6: mcast: RCU conversion ipv6_sk_mc_lock rwlock becomes a spinlock. readers (inet6_mc_check()) now takes rcu_read_lock() instead of read lock. Writers dont need to disable BH anymore. struct ipv6_mc_socklist objects are reclaimed after one RCU grace period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/if_inet6.h | 3 +- net/ipv6/mcast.c | 75 +++++++++++++++++++++++++++++--------------------- 3 files changed, 47 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8e429d0e040..0c997767429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -364,7 +364,7 @@ struct ipv6_pinfo { __u32 dst_cookie; - struct ipv6_mc_socklist *ipv6_mc_list; + struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f95ff8d9aa4..04977eefb0e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -89,10 +89,11 @@ struct ip6_sf_socklist { struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; - struct ipv6_mc_socklist *next; + struct ipv6_mc_socklist __rcu *next; rwlock_t sflock; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip6_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip6_sf_list { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9c5074528a7..49f986d626a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } -- cgit v1.2.3-70-g09d2 From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:29 +0000 Subject: xps: Improvements in TX queue selection In dev_pick_tx, don't do work in calculating queue index or setting the index in the sock unless the device has more than one queue. This allows the sock to be set only with a queue index of a multi-queue device which is desirable if device are stacked like in a tunnel. We also allow the mapping of a socket to queue to be changed. To maintain in order packet transmission a flag (ooo_okay) has been added to the sk_buff structure. If a transport layer sets this flag on a packet, the transmit queue can be changed for the socket. Presumably, the transport would set this if there was no possbility of creating OOO packets (for instance, there are no packets in flight for the socket). This patch includes the modification in TCP output for setting this flag. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- net/core/dev.c | 18 +++++++++++------- net/ipv4/tcp_output.c | 5 ++++- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e6ba898de61..19f37a6ee6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -386,9 +386,10 @@ struct sk_buff { #else __u8 deliver_no_wcard:1; #endif + __u8 ooo_okay:1; kmemcheck_bitfield_end(flags2); - /* 0/14 bit hole */ + /* 0/13 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/net/core/dev.c b/net/core/dev.c index 381b8e28016..7b17674a29e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, int queue_index; const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) { + if (dev->real_num_tx_queues == 1) + queue_index = 0; + else if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); queue_index = dev_cap_txqueue(dev, queue_index); } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { - queue_index = 0; - if (dev->real_num_tx_queues > 1) - queue_index = skb_tx_hash(dev, skb); + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int old_index = queue_index; - if (sk) { - struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + queue_index = skb_tx_hash(dev, skb); + + if (queue_index != old_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bb8f547fc7d..5f29b2e20e2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -822,8 +822,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) + if (tcp_packets_in_flight(tp) == 0) { tcp_ca_event(sk, CA_EVENT_TX_START); + skb->ooo_okay = 1; + } else + skb->ooo_okay = 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); -- cgit v1.2.3-70-g09d2 From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:27 +0000 Subject: xps: Transmit Packet Steering This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth/queues/tx-/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 30 ++++ net/core/dev.c | 53 ++++++- net/core/net-sysfs.c | 369 +++++++++++++++++++++++++++++++++++++++++++++- net/core/net-sysfs.h | 3 + 4 files changed, 447 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b45c1b8b1d1..badf9285fe0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -503,6 +503,10 @@ struct netdev_queue { struct Qdisc *qdisc; unsigned long state; struct Qdisc *qdisc_sleeping; +#ifdef CONFIG_RPS + struct kobject kobj; +#endif + /* * write mostly part */ @@ -529,6 +533,30 @@ struct rps_map { }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) +/* + * This structure holds an XPS map which can be of variable length. The + * map is an array of queues. + */ +struct xps_map { + unsigned int len; + unsigned int alloc_len; + struct rcu_head rcu; + u16 queues[0]; +}; +#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) +#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ + / sizeof(u16)) + +/* + * This structure holds all XPS maps for device. Maps are indexed by CPU. + */ +struct xps_dev_maps { + struct rcu_head rcu; + struct xps_map *cpu_map[0]; +}; +#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * sizeof(struct xps_map *))) + /* * The rps_dev_flow structure contains the mapping of a flow to a CPU and the * tail pointer for that CPU's input queue at the time of last enqueue. @@ -1016,6 +1044,8 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; + struct xps_dev_maps *xps_maps; + /* These may be needed for future network-power-down code. */ /* diff --git a/net/core/dev.c b/net/core/dev.c index 7b17674a29e..c852f0038a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + int rc; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, + txq); if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) return queue_index; } +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_RPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { @@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, queue_index >= dev->real_num_tx_queues) { int old_index = queue_index; - queue_index = skb_tx_hash(dev, skb); + queue_index = get_xps_queue(dev, skb); + if (queue_index < 0) + queue_index = skb_tx_hash(dev, skb); if (queue_index != old_index && sk) { struct dst_entry *dst = @@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + int i; BUG_ON(count < 1); @@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return -ENOMEM; } dev->_tx = tx; + + for (i = 0; i < count; i++) + tx[i].dev = dev; + return 0; } @@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, void *_unused) { - queue->dev = dev; - /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7abeb7ceaa4..68dbbfdee27 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -772,18 +772,377 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) return error; } -static int rx_queue_register_kobjects(struct net_device *net) +/* + * netdev_queue sysfs structures and functions. + */ +struct netdev_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_netdev_queue_attr(_attr) container_of(_attr, \ + struct netdev_queue_attribute, attr) + +#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) + +static ssize_t netdev_queue_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); +} + +static ssize_t netdev_queue_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static const struct sysfs_ops netdev_queue_sysfs_ops = { + .show = netdev_queue_attr_show, + .store = netdev_queue_attr_store, +}; + +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) { + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + + +static ssize_t show_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, char *buf) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + cpumask_var_t mask; + unsigned long index; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + for_each_possible_cpu(i) { + struct xps_map *map = + rcu_dereference(dev_maps->cpu_map[i]); + if (map) { + int j; + for (j = 0; j < map->len; j++) { + if (map->queues[j] == index) { + cpumask_set_cpu(i, mask); + break; + } + } + } + } + } + rcu_read_unlock(); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + free_cpumask_var(mask); + return -EINVAL; + } + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void xps_map_release(struct rcu_head *rcu) +{ + struct xps_map *map = container_of(rcu, struct xps_map, rcu); + + kfree(map); +} + +static void xps_dev_maps_release(struct rcu_head *rcu) +{ + struct xps_dev_maps *dev_maps = + container_of(rcu, struct xps_dev_maps, rcu); + + kfree(dev_maps); +} + +static DEFINE_MUTEX(xps_map_mutex); + +static ssize_t store_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + cpumask_var_t mask; + int err, i, cpu, pos, map_len, alloc_len, need_set; + unsigned long index; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + new_dev_maps = kzalloc(max_t(unsigned, + XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); + if (!new_dev_maps) { + free_cpumask_var(mask); + return -ENOMEM; + } + + mutex_lock(&xps_map_mutex); + + dev_maps = dev->xps_maps; + + for_each_possible_cpu(cpu) { + new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); + + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + new_dev_maps->cpu_map[cpu] = new_map; + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; + if (map && new_dev_maps->cpu_map[cpu] != map) + call_rcu(&map->rcu, xps_map_release); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + else { + kfree(new_dev_maps); + rcu_assign_pointer(dev->xps_maps, NULL); + } + + if (dev_maps) + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + + mutex_unlock(&xps_map_mutex); + + free_cpumask_var(mask); + return len; + +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(new_dev_maps->cpu_map[i]); + kfree(new_dev_maps); + free_cpumask_var(mask); + return -ENOMEM; +} + +static struct netdev_queue_attribute xps_cpus_attribute = + __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); + +static struct attribute *netdev_queue_default_attrs[] = { + &xps_cpus_attribute.attr, + NULL +}; + +static void netdev_queue_release(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = dev->xps_maps; + + if (dev_maps) { + for_each_possible_cpu(i) { + map = dev_maps->cpu_map[i]; + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + call_rcu(&map->rcu, xps_map_release); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + } + } + + mutex_unlock(&xps_map_mutex); + + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); +} + +static struct kobj_type netdev_queue_ktype = { + .sysfs_ops = &netdev_queue_sysfs_ops, + .release = netdev_queue_release, + .default_attrs = netdev_queue_default_attrs, +}; + +static int netdev_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_queue *queue = net->_tx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, + "tx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + dev_hold(queue->dev); + + return error; +} + +int +netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +{ + int i; + int error = 0; + + for (i = old_num; i < new_num; i++) { + error = netdev_queue_add_kobject(net, i); + if (error) { + new_num = old_num; + break; + } + } + + while (--i >= new_num) + kobject_put(&net->_tx[i].kobj); + + return error; +} + +static int register_queue_kobjects(struct net_device *net) +{ + int error = 0, txq = 0, rxq = 0; + net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; - return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + + error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + if (error) + goto error; + rxq = net->real_num_rx_queues; + + error = netdev_queue_update_kobjects(net, 0, + net->real_num_tx_queues); + if (error) + goto error; + txq = net->real_num_tx_queues; + + return 0; + +error: + netdev_queue_update_kobjects(net, txq, 0); + net_rx_queue_update_kobjects(net, rxq, 0); + return error; } -static void rx_queue_remove_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *net) { net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); + netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); kset_unregister(net->queues_kset); } #endif /* CONFIG_RPS */ @@ -886,7 +1245,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); #ifdef CONFIG_RPS - rx_queue_remove_kobjects(net); + remove_queue_kobjects(net); #endif device_del(dev); @@ -927,7 +1286,7 @@ int netdev_register_kobject(struct net_device *net) return error; #ifdef CONFIG_RPS - error = rx_queue_register_kobjects(net); + error = register_queue_kobjects(net); if (error) { device_del(dev); return error; diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 778e1571548..25ec2ee57df 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -6,6 +6,9 @@ int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); #ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); +int netdev_queue_update_kobjects(struct net_device *net, + int old_num, int new_num); + #endif #endif -- cgit v1.2.3-70-g09d2 From ccb14354017272ddac002e859a2711610b6af174 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 24 Nov 2010 16:18:36 -0500 Subject: Revert "nl80211/mac80211: Report signal average" This reverts commit 86107fd170bc379869250eb7e1bd393a3a70e8ae. This patch inadvertantly changed the userland ABI. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 -- include/net/cfg80211.h | 4 ---- net/mac80211/Kconfig | 1 - net/mac80211/cfg.c | 3 +-- net/mac80211/rx.c | 1 - net/mac80211/sta_info.c | 2 -- net/mac80211/sta_info.h | 3 --- net/wireless/nl80211.c | 3 --- 8 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 1ce3775e9e2..037b4e49889 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,7 +1161,6 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) - * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1179,7 +1178,6 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, - NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69e2364889f..8fd9eebd0cc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,7 +424,6 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled - * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -440,7 +439,6 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, - STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -487,7 +485,6 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm - * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -508,7 +505,6 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; - s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 798d9b9462e..4d6f8653ec8 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,7 +6,6 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 - select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 92c9cf6a7d1..0c544074479 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,9 +343,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; + sinfo->filled |= STATION_INFO_SIGNAL; sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dd60a74181..d2fcd22ab06 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,7 +1156,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; - ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f43fca8907f..eff58571fd7 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,8 +244,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - ewma_init(&sta->avg_signal, 1000, 8); - if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 84062e2c782..9265acadef3 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "key.h" /** @@ -225,7 +224,6 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA - * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -293,7 +291,6 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; - struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d06a40d1700..60555384222 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,9 +1872,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); - if (sinfo->filled & STATION_INFO_SIGNAL_AVG) - NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, - sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-70-g09d2 From c063dbf52b998b852122dff07a8b8dd430b38437 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Nov 2010 08:10:05 +0100 Subject: cfg80211: allow using CQM event to notify packet loss This adds the ability for drivers to use CQM events to notify about packet loss for specific stations (which could be the AP for the managed mode case). Since the threshold might be determined by the driver (it isn't passed in right now) it will be passed out of the driver to userspace in the event. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ include/net/cfg80211.h | 12 ++++++++++++ net/wireless/mlme.c | 12 ++++++++++++ net/wireless/nl80211.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 4 ++++ 5 files changed, 76 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e49889..d706bf3badc 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1819,6 +1819,8 @@ enum nl80211_ps_state { * the minimum amount the RSSI level must change after an event before a * new event may be issued (to reduce effects of RSSI oscillation). * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event + * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many + * consecutive packets were not acknowledged by the peer * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -1827,6 +1829,7 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_RSSI_THOLD, NL80211_ATTR_CQM_RSSI_HYST, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, + NL80211_ATTR_CQM_PKT_LOSS_EVENT, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dd4c43f512e..0663945cfa4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2601,6 +2601,18 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer + * @dev: network device + * @peer: peer's MAC address + * @num_packets: how many packets were lost -- should be a fixed threshold + * but probably no less than maybe 50, or maybe a throughput dependent + * threshold (to account for temporary interference) + * @gfp: context flags + */ +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 26838d903b9..6980a0c315b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate roaming trigger event to user space */ + nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8734efa663d..67ff7e92cb9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5715,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets); + + nla_nest_end(msg, pinfoattr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 30d2f939150..16c2f719076 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-70-g09d2 From a782d688e9c6f9ca9a7a9a28e8e2876969ddef53 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 24 Nov 2010 10:05:22 +0000 Subject: mmc: sh_mmcif: add DMA support The MMCIF controller on sh-mobile platforms can use the DMA controller for data transfers. Interface to the SH dmaengine driver to enable DMA. We also have to lower the maximum number of segments to match with the number od DMA descriptors on SuperH, this doesn't significantly affect driver's PIO performance. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/mmc/host/Kconfig | 6 ++ drivers/mmc/host/sh_mmcif.c | 246 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mmc/sh_mmcif.h | 15 ++- 3 files changed, 258 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index d618e867399..859e352d0b5 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -466,6 +466,12 @@ config MMC_SH_MMCIF This driver supports MMCIF in sh7724/sh7757/sh7372. +config SH_MMCIF_DMA + bool "Use DMA for MMCIF" + depends on MMC_SH_MMCIF + help + Use SH dma-engine driver for data transfer + config MMC_JZ4740 tristate "JZ4740 SD/Multimedia Card Interface support" depends on MACH_JZ4740 diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index b2f261cdaec..d09a2b38eee 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -20,12 +20,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #define DRIVER_NAME "sh_mmcif" @@ -162,8 +164,13 @@ struct sh_mmcif_host { long timeout; void __iomem *addr; struct completion intr_wait; -}; + /* DMA support */ + struct dma_chan *chan_rx; + struct dma_chan *chan_tx; + struct completion dma_complete; + unsigned int dma_sglen; +}; static inline void sh_mmcif_bitset(struct sh_mmcif_host *host, unsigned int reg, u32 val) @@ -177,6 +184,208 @@ static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host, writel(~val & readl(host->addr + reg), host->addr + reg); } +#ifdef CONFIG_SH_MMCIF_DMA +static void mmcif_dma_complete(void *arg) +{ + struct sh_mmcif_host *host = arg; + dev_dbg(&host->pd->dev, "Command completed\n"); + + if (WARN(!host->data, "%s: NULL data in DMA completion!\n", + dev_name(&host->pd->dev))) + return; + + if (host->data->flags & MMC_DATA_READ) + dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen, + DMA_FROM_DEVICE); + else + dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen, + DMA_TO_DEVICE); + + complete(&host->dma_complete); +} + +static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) +{ + struct scatterlist *sg = host->data->sg; + struct dma_async_tx_descriptor *desc = NULL; + struct dma_chan *chan = host->chan_rx; + dma_cookie_t cookie = -EINVAL; + int ret; + + ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_FROM_DEVICE); + if (ret > 0) { + host->dma_sglen = ret; + desc = chan->device->device_prep_slave_sg(chan, sg, ret, + DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } + + if (desc) { + desc->callback = mmcif_dma_complete; + desc->callback_param = host; + cookie = desc->tx_submit(desc); + if (cookie < 0) { + desc = NULL; + ret = cookie; + } else { + sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN); + chan->device->device_issue_pending(chan); + } + } + dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", + __func__, host->data->sg_len, ret, cookie); + + if (!desc) { + /* DMA failed, fall back to PIO */ + if (ret >= 0) + ret = -EIO; + host->chan_rx = NULL; + host->dma_sglen = 0; + dma_release_channel(chan); + /* Free the Tx channel too */ + chan = host->chan_tx; + if (chan) { + host->chan_tx = NULL; + dma_release_channel(chan); + } + dev_warn(&host->pd->dev, + "DMA failed: %d, falling back to PIO\n", ret); + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + } + + dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__, + desc, cookie, host->data->sg_len); +} + +static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) +{ + struct scatterlist *sg = host->data->sg; + struct dma_async_tx_descriptor *desc = NULL; + struct dma_chan *chan = host->chan_tx; + dma_cookie_t cookie = -EINVAL; + int ret; + + ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_TO_DEVICE); + if (ret > 0) { + host->dma_sglen = ret; + desc = chan->device->device_prep_slave_sg(chan, sg, ret, + DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } + + if (desc) { + desc->callback = mmcif_dma_complete; + desc->callback_param = host; + cookie = desc->tx_submit(desc); + if (cookie < 0) { + desc = NULL; + ret = cookie; + } else { + sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAWEN); + chan->device->device_issue_pending(chan); + } + } + dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n", + __func__, host->data->sg_len, ret, cookie); + + if (!desc) { + /* DMA failed, fall back to PIO */ + if (ret >= 0) + ret = -EIO; + host->chan_tx = NULL; + host->dma_sglen = 0; + dma_release_channel(chan); + /* Free the Rx channel too */ + chan = host->chan_rx; + if (chan) { + host->chan_rx = NULL; + dma_release_channel(chan); + } + dev_warn(&host->pd->dev, + "DMA failed: %d, falling back to PIO\n", ret); + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + } + + dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d\n", __func__, + desc, cookie); +} + +static bool sh_mmcif_filter(struct dma_chan *chan, void *arg) +{ + dev_dbg(chan->device->dev, "%s: slave data %p\n", __func__, arg); + chan->private = arg; + return true; +} + +static void sh_mmcif_request_dma(struct sh_mmcif_host *host, + struct sh_mmcif_plat_data *pdata) +{ + host->dma_sglen = 0; + + /* We can only either use DMA for both Tx and Rx or not use it at all */ + if (pdata->dma) { + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + host->chan_tx = dma_request_channel(mask, sh_mmcif_filter, + &pdata->dma->chan_priv_tx); + dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__, + host->chan_tx); + + if (!host->chan_tx) + return; + + host->chan_rx = dma_request_channel(mask, sh_mmcif_filter, + &pdata->dma->chan_priv_rx); + dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__, + host->chan_rx); + + if (!host->chan_rx) { + dma_release_channel(host->chan_tx); + host->chan_tx = NULL; + return; + } + + init_completion(&host->dma_complete); + } +} + +static void sh_mmcif_release_dma(struct sh_mmcif_host *host) +{ + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + /* Descriptors are freed automatically */ + if (host->chan_tx) { + struct dma_chan *chan = host->chan_tx; + host->chan_tx = NULL; + dma_release_channel(chan); + } + if (host->chan_rx) { + struct dma_chan *chan = host->chan_rx; + host->chan_rx = NULL; + dma_release_channel(chan); + } + + host->dma_sglen = 0; +} +#else +static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host) +{ +} + +static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host) +{ +} + +static void sh_mmcif_request_dma(struct sh_mmcif_host *host, + struct sh_mmcif_plat_data *pdata) +{ + /* host->chan_tx, host->chan_tx and host->dma_sglen are all zero */ +} + +static void sh_mmcif_release_dma(struct sh_mmcif_host *host) +{ +} +#endif static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk) { @@ -564,7 +773,20 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host, } sh_mmcif_get_response(host, cmd); if (host->data) { - ret = sh_mmcif_data_trans(host, mrq, cmd->opcode); + if (!host->dma_sglen) { + ret = sh_mmcif_data_trans(host, mrq, cmd->opcode); + } else { + long time = + wait_for_completion_interruptible_timeout(&host->dma_complete, + host->timeout); + if (!time) + ret = -ETIMEDOUT; + else if (time < 0) + ret = time; + sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, + BUF_ACC_DMAREN | BUF_ACC_DMAWEN); + host->dma_sglen = 0; + } if (ret < 0) mrq->data->bytes_xfered = 0; else @@ -622,6 +844,15 @@ static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq) break; } host->data = mrq->data; + if (mrq->data) { + if (mrq->data->flags & MMC_DATA_READ) { + if (host->chan_rx) + sh_mmcif_start_dma_rx(host); + } else { + if (host->chan_tx) + sh_mmcif_start_dma_tx(host); + } + } sh_mmcif_start_cmd(host, mrq, mrq->cmd); host->data = NULL; @@ -806,14 +1037,18 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev) mmc->caps = MMC_CAP_MMC_HIGHSPEED; if (pd->caps) mmc->caps |= pd->caps; - mmc->max_segs = 128; + mmc->max_segs = 32; mmc->max_blk_size = 512; - mmc->max_blk_count = 65535; - mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; + mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs; + mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size; mmc->max_seg_size = mmc->max_req_size; sh_mmcif_sync_reset(host); platform_set_drvdata(pdev, host); + + /* See if we also get DMA */ + sh_mmcif_request_dma(host, pd); + mmc_add_host(mmc); ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host); @@ -852,6 +1087,7 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev) int irq[2]; mmc_remove_host(host->mmc); + sh_mmcif_release_dma(host); if (host->addr) iounmap(host->addr); diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index a6bfa529649..f216a8879b5 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -14,8 +14,9 @@ #ifndef __SH_MMCIF_H__ #define __SH_MMCIF_H__ -#include #include +#include +#include /* * MMCIF : CE_CLK_CTRL [19:16] @@ -31,13 +32,19 @@ * 1111 : Peripheral clock (sup_pclk set '1') */ +struct sh_mmcif_dma { + struct sh_dmae_slave chan_priv_tx; + struct sh_dmae_slave chan_priv_rx; +}; + struct sh_mmcif_plat_data { void (*set_pwr)(struct platform_device *pdev, int state); void (*down_pwr)(struct platform_device *pdev); int (*get_cd)(struct platform_device *pdef); - u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ - unsigned long caps; - u32 ocr; + struct sh_mmcif_dma *dma; + u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ + unsigned long caps; + u32 ocr; }; #define MMCIF_CE_CMD_SET 0x00000000 -- cgit v1.2.3-70-g09d2 From 6d803ba736abb5e122dede70a4720e4843dd6df4 Mon Sep 17 00:00:00 2001 From: Jean-Christop PLAGNIOL-VILLARD Date: Wed, 17 Nov 2010 10:04:33 +0100 Subject: ARM: 6483/1: arm & sh: factorised duplicated clkdev.c factorise some generic infrastructure to assist looking up struct clks for the ARM & SH architecture. as the code is identical at 99% put the arch specific code for allocation as example in asm/clkdev.h Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Paul Mundt Signed-off-by: Russell King --- arch/arm/Kconfig | 42 +++--- arch/arm/common/Kconfig | 4 - arch/arm/common/Makefile | 1 - arch/arm/common/clkdev.c | 179 -------------------------- arch/arm/include/asm/clkdev.h | 22 +--- arch/arm/mach-bcmring/clock.c | 3 +- arch/arm/mach-bcmring/core.c | 2 +- arch/arm/mach-davinci/clock.h | 2 +- arch/arm/mach-ep93xx/clock.c | 2 +- arch/arm/mach-imx/clock-imx1.c | 3 +- arch/arm/mach-imx/clock-imx21.c | 2 +- arch/arm/mach-imx/clock-imx27.c | 2 +- arch/arm/mach-integrator/core.c | 3 +- arch/arm/mach-integrator/impd1.c | 3 +- arch/arm/mach-integrator/integrator_cp.c | 3 +- arch/arm/mach-lpc32xx/clock.c | 3 +- arch/arm/mach-mmp/clock.h | 2 +- arch/arm/mach-mx25/clock.c | 3 +- arch/arm/mach-mx3/clock-imx31.c | 2 +- arch/arm/mach-mx3/clock-imx35.c | 3 +- arch/arm/mach-mx5/clock-mx51.c | 2 +- arch/arm/mach-mxc91231/clock.c | 2 +- arch/arm/mach-nomadik/clock.c | 2 +- arch/arm/mach-nuc93x/clock.h | 2 +- arch/arm/mach-omap1/clock.c | 2 +- arch/arm/mach-omap2/dpll3xxx.c | 2 +- arch/arm/mach-pnx4008/clock.c | 3 +- arch/arm/mach-pxa/clock.c | 2 +- arch/arm/mach-pxa/clock.h | 2 +- arch/arm/mach-realview/core.c | 3 +- arch/arm/mach-shmobile/Kconfig | 6 +- arch/arm/mach-shmobile/clock-sh7367.c | 2 +- arch/arm/mach-shmobile/clock-sh7372.c | 2 +- arch/arm/mach-shmobile/clock-sh7377.c | 2 +- arch/arm/mach-tcc8k/clock.c | 3 +- arch/arm/mach-tegra/clock.c | 2 +- arch/arm/mach-tegra/clock.h | 2 +- arch/arm/mach-tegra/tegra2_clocks.c | 3 +- arch/arm/mach-u300/clock.c | 2 +- arch/arm/mach-ux500/clock.c | 3 +- arch/arm/mach-versatile/core.c | 3 +- arch/arm/mach-vexpress/ct-ca9x4.c | 3 +- arch/arm/mach-vexpress/v2m.c | 3 +- arch/arm/mach-w90x900/clock.h | 2 +- arch/arm/plat-omap/Kconfig | 4 +- arch/arm/plat-omap/include/plat/clkdev_omap.h | 2 +- arch/arm/plat-spear/include/plat/clock.h | 2 +- arch/arm/plat-stmp3xxx/clock.c | 2 +- arch/sh/Kconfig | 2 +- arch/sh/boards/mach-highlander/setup.c | 2 +- arch/sh/include/asm/clkdev.h | 38 +++--- arch/sh/kernel/Makefile | 2 +- arch/sh/kernel/clkdev.c | 171 ------------------------ arch/sh/kernel/cpu/clock-cpg.c | 2 +- arch/sh/kernel/cpu/clock.c | 16 --- arch/sh/kernel/cpu/sh4/clock-sh4-202.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7343.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7722.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7723.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7724.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7757.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7763.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7780.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7785.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-sh7786.c | 2 +- arch/sh/kernel/cpu/sh4a/clock-shx3.c | 2 +- drivers/Kconfig | 2 + drivers/Makefile | 2 + drivers/clk/Kconfig | 4 + drivers/clk/Makefile | 2 + drivers/clk/clkdev.c | 176 +++++++++++++++++++++++++ include/linux/clkdev.h | 36 ++++++ 73 files changed, 328 insertions(+), 507 deletions(-) delete mode 100644 arch/arm/common/clkdev.c delete mode 100644 arch/sh/kernel/clkdev.c create mode 100644 drivers/clk/Kconfig create mode 100644 drivers/clk/Makefile create mode 100644 drivers/clk/clkdev.c create mode 100644 include/linux/clkdev.h (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a19a5266d5f..0e51342b3c0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -221,7 +221,7 @@ config ARCH_INTEGRATOR bool "ARM Ltd. Integrator family" select ARM_AMBA select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select PLAT_VERSATILE @@ -231,7 +231,7 @@ config ARCH_INTEGRATOR config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -245,7 +245,7 @@ config ARCH_VERSATILE bool "ARM Ltd. Versatile family" select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -259,7 +259,7 @@ config ARCH_VEXPRESS select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_TIMER_SP804 - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK select ICST @@ -280,7 +280,7 @@ config ARCH_BCMRING depends on MMU select CPU_V6 select ARM_AMBA - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB help @@ -327,7 +327,7 @@ config ARCH_EP93XX select CPU_ARM920T select ARM_AMBA select ARM_VIC - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_USES_GETTIMEOFFSET @@ -347,14 +347,14 @@ config ARCH_MXC bool "Freescale MXC/iMX-based" select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Freescale MXC/iMX-based family of processors config ARCH_STMP3XXX bool "Freescale STMP3xxx" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select USB_ARCH_HAS_EHCI @@ -472,7 +472,7 @@ config ARCH_LPC32XX select HAVE_IDE select ARM_AMBA select USB_ARCH_HAS_OHCI - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_TIME select GENERIC_CLOCKEVENTS help @@ -506,7 +506,7 @@ config ARCH_MMP bool "Marvell PXA168/910/MMP2" depends on MMU select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select TICK_ONESHOT select PLAT_PXA @@ -539,7 +539,7 @@ config ARCH_W90X900 bool "Nuvoton W90X900 CPU" select CPU_ARM926T select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Nuvoton (Winbond logic dept.) ARM9 processor, @@ -553,7 +553,7 @@ config ARCH_W90X900 config ARCH_NUC93X bool "Nuvoton NUC93X CPU" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP help Support for Nuvoton (Winbond logic dept.) NUC93X MCU,The NUC93X is a low-power and high performance MPEG-4/JPEG multimedia controller chip. @@ -564,7 +564,7 @@ config ARCH_TEGRA select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_HAS_BARRIERS if CACHE_L2X0 select ARCH_HAS_CPUFREQ help @@ -574,7 +574,7 @@ config ARCH_TEGRA config ARCH_PNX4008 bool "Philips Nexperia PNX4008 Mobile" select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_USES_GETTIMEOFFSET help This enables support for Philips PNX4008 mobile platform. @@ -584,7 +584,7 @@ config ARCH_PXA depends on MMU select ARCH_MTD_XIP select ARCH_HAS_CPUFREQ - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select TICK_ONESHOT @@ -761,7 +761,7 @@ config ARCH_TCC_926 bool "Telechips TCC ARM926-based systems" select CPU_ARM926T select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS help Support for Telechips TCC ARM926-based systems. @@ -785,7 +785,7 @@ config ARCH_U300 select ARM_AMBA select ARM_VIC select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_GPIO help Support for ST-Ericsson U300 series mobile platforms. @@ -795,7 +795,7 @@ config ARCH_U8500 select CPU_V7 select ARM_AMBA select GENERIC_CLOCKEVENTS - select COMMON_CLKDEV + select CLKDEV_LOOKUP select ARCH_REQUIRE_GPIOLIB help Support for ST-Ericsson's Ux500 architecture @@ -805,7 +805,7 @@ config ARCH_NOMADIK select ARM_AMBA select ARM_VIC select CPU_ARM926T - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB help @@ -817,7 +817,7 @@ config ARCH_DAVINCI select ARCH_REQUIRE_GPIOLIB select ZONE_DMA select HAVE_IDE - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_ALLOCATOR select ARCH_HAS_HOLES_MEMORYMODEL help @@ -837,7 +837,7 @@ config PLAT_SPEAR bool "ST SPEAr" select ARM_AMBA select ARCH_REQUIRE_GPIOLIB - select COMMON_CLKDEV + select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select HAVE_CLK help diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 0a34c818692..778655f0257 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -37,7 +37,3 @@ config SHARP_PARAM config SHARP_SCOOP bool - -config COMMON_CLKDEV - bool - select HAVE_CLK diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index e6e8664a941..799e140274f 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -16,4 +16,3 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_ARCH_IXP2000) += uengine.o obj-$(CONFIG_ARCH_IXP23XX) += uengine.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o -obj-$(CONFIG_COMMON_CLKDEV) += clkdev.o diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c deleted file mode 100644 index e2b2bb66e09..00000000000 --- a/arch/arm/common/clkdev.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * arch/arm/common/clkdev.c - * - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/* - * Find the correct struct clk for the device and connection ID. - * We do slightly fuzzy matching here: - * An entry with a NULL ID is assumed to be a wildcard. - * If an entry has a device ID, it must match - * If an entry has a connection ID, it must match - * Then we take the most specific entry - with the following - * order of precedence: dev+con > dev only > con only. - */ -static struct clk *clk_find(const char *dev_id, const char *con_id) -{ - struct clk_lookup *p; - struct clk *clk = NULL; - int match, best = 0; - - list_for_each_entry(p, &clocks, node) { - match = 0; - if (p->dev_id) { - if (!dev_id || strcmp(p->dev_id, dev_id)) - continue; - match += 2; - } - if (p->con_id) { - if (!con_id || strcmp(p->con_id, con_id)) - continue; - match += 1; - } - - if (match > best) { - clk = p->clk; - if (match != 3) - best = match; - else - break; - } - } - return clk; -} - -struct clk *clk_get_sys(const char *dev_id, const char *con_id) -{ - struct clk *clk; - - mutex_lock(&clocks_mutex); - clk = clk_find(dev_id, con_id); - if (clk && !__clk_get(clk)) - clk = NULL; - mutex_unlock(&clocks_mutex); - - return clk ? clk : ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get_sys); - -struct clk *clk_get(struct device *dev, const char *con_id) -{ - const char *dev_id = dev ? dev_name(dev) : NULL; - - return clk_get_sys(dev_id, con_id); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - __clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -void clkdev_add(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_add_tail(&cl->node, &clocks); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clkdev_add); - -void __init clkdev_add_table(struct clk_lookup *cl, size_t num) -{ - mutex_lock(&clocks_mutex); - while (num--) { - list_add_tail(&cl->node, &clocks); - cl++; - } - mutex_unlock(&clocks_mutex); -} - -#define MAX_DEV_ID 20 -#define MAX_CON_ID 16 - -struct clk_lookup_alloc { - struct clk_lookup cl; - char dev_id[MAX_DEV_ID]; - char con_id[MAX_CON_ID]; -}; - -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...) -{ - struct clk_lookup_alloc *cla; - - cla = kzalloc(sizeof(*cla), GFP_KERNEL); - if (!cla) - return NULL; - - cla->cl.clk = clk; - if (con_id) { - strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); - cla->cl.con_id = cla->con_id; - } - - if (dev_fmt) { - va_list ap; - - va_start(ap, dev_fmt); - vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); - cla->cl.dev_id = cla->dev_id; - va_end(ap); - } - - return &cla->cl; -} -EXPORT_SYMBOL(clkdev_alloc); - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (IS_ERR(r)) - return PTR_ERR(r); - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} -EXPORT_SYMBOL(clk_add_alias); - -/* - * clkdev_drop - remove a clock dynamically allocated - */ -void clkdev_drop(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_del(&cl->node); - mutex_unlock(&clocks_mutex); - kfree(cl); -} -EXPORT_SYMBOL(clkdev_drop); diff --git a/arch/arm/include/asm/clkdev.h b/arch/arm/include/asm/clkdev.h index b56c1389b6f..765d3322236 100644 --- a/arch/arm/include/asm/clkdev.h +++ b/arch/arm/include/asm/clkdev.h @@ -12,23 +12,13 @@ #ifndef __ASM_CLKDEV_H #define __ASM_CLKDEV_H -struct clk; -struct device; +#include -struct clk_lookup { - struct list_head node; - const char *dev_id; - const char *con_id; - struct clk *clk; -}; +#include -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); - -void clkdev_add(struct clk_lookup *cl); -void clkdev_drop(struct clk_lookup *cl); - -void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) +{ + return kzalloc(size, GFP_KERNEL); +} #endif diff --git a/arch/arm/mach-bcmring/clock.c b/arch/arm/mach-bcmring/clock.c index 14bafc38f2d..ad237a42d26 100644 --- a/arch/arm/mach-bcmring/clock.c +++ b/arch/arm/mach-bcmring/clock.c @@ -21,13 +21,12 @@ #include #include #include +#include #include #include #include #include -#include - #include "clock.h" #define clk_is_primary(x) ((x)->type & CLK_TYPE_PRIMARY) diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index d3f959e92b2..ed96ef40047 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -30,10 +30,10 @@ #include #include #include +#include #include #include -#include #include #include #include diff --git a/arch/arm/mach-davinci/clock.h b/arch/arm/mach-davinci/clock.h index 11099980b58..0dd22031ec6 100644 --- a/arch/arm/mach-davinci/clock.h +++ b/arch/arm/mach-davinci/clock.h @@ -68,7 +68,7 @@ #ifndef __ASSEMBLER__ #include -#include +#include #define PLLSTAT_GOSTAT BIT(0) #define PLLCMD_GOSET BIT(0) diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index ef06c66a6f1..ca4de710509 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -19,10 +19,10 @@ #include #include #include +#include #include -#include #include diff --git a/arch/arm/mach-imx/clock-imx1.c b/arch/arm/mach-imx/clock-imx1.c index daca30b2d5b..3938a563b28 100644 --- a/arch/arm/mach-imx/clock-imx1.c +++ b/arch/arm/mach-imx/clock-imx1.c @@ -22,8 +22,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-imx/clock-imx21.c b/arch/arm/mach-imx/clock-imx21.c index cf15ea516a7..d7056559715 100644 --- a/arch/arm/mach-imx/clock-imx21.c +++ b/arch/arm/mach-imx/clock-imx21.c @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #define IO_ADDR_CCM(off) (MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off))) diff --git a/arch/arm/mach-imx/clock-imx27.c b/arch/arm/mach-imx/clock-imx27.c index 98a25bada78..ca1017b9028 100644 --- a/arch/arm/mach-imx/clock-imx27.c +++ b/arch/arm/mach-imx/clock-imx27.c @@ -21,8 +21,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 8f4fb6d638f..b8e884b450d 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -21,9 +21,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index fd684bf205e..5db574f8ae3 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -22,9 +22,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 6258c90d020..9403d2fa13a 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -21,9 +21,8 @@ #include #include #include +#include -#include -#include #include #include #include diff --git a/arch/arm/mach-lpc32xx/clock.c b/arch/arm/mach-lpc32xx/clock.c index 32d63796430..da0e6498110 100644 --- a/arch/arm/mach-lpc32xx/clock.c +++ b/arch/arm/mach-lpc32xx/clock.c @@ -90,10 +90,9 @@ #include #include #include +#include #include -#include -#include #include #include "clock.h" #include "common.h" diff --git a/arch/arm/mach-mmp/clock.h b/arch/arm/mach-mmp/clock.h index 016ae94691c..9b027d7491f 100644 --- a/arch/arm/mach-mmp/clock.h +++ b/arch/arm/mach-mmp/clock.h @@ -6,7 +6,7 @@ * published by the Free Software Foundation. */ -#include +#include struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 9e4a5578c2f..00dcb08019e 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c index 109e98f323e..1cd8b40b767 100644 --- a/arch/arm/mach-mx3/clock-imx31.c +++ b/arch/arm/mach-mx3/clock-imx31.c @@ -23,8 +23,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c index 61e4a318980..819dd809615 100644 --- a/arch/arm/mach-mx3/clock-imx35.c +++ b/arch/arm/mach-mx3/clock-imx35.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c index 8ac36d88292..5975edb47de 100644 --- a/arch/arm/mach-mx5/clock-mx51.c +++ b/arch/arm/mach-mx5/clock-mx51.c @@ -14,8 +14,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-mxc91231/clock.c b/arch/arm/mach-mxc91231/clock.c index 5c85075d8a5..9fab505f1eb 100644 --- a/arch/arm/mach-mxc91231/clock.c +++ b/arch/arm/mach-mxc91231/clock.c @@ -2,12 +2,12 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/arch/arm/mach-nomadik/clock.c b/arch/arm/mach-nomadik/clock.c index 89f793adf77..48a59f24e10 100644 --- a/arch/arm/mach-nomadik/clock.c +++ b/arch/arm/mach-nomadik/clock.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "clock.h" /* diff --git a/arch/arm/mach-nuc93x/clock.h b/arch/arm/mach-nuc93x/clock.h index 18e51be4816..4de1f1da9dc 100644 --- a/arch/arm/mach-nuc93x/clock.h +++ b/arch/arm/mach-nuc93x/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include +#include void nuc93x_clk_enable(struct clk *clk, int enable); void clks_register(struct clk_lookup *clks, size_t num); diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index b8c7fb9d792..84ef70476b5 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -17,9 +17,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index ed8d330522f..ebb888f5936 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -26,10 +26,10 @@ #include #include #include +#include #include #include -#include #include "clock.h" #include "prm.h" diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c index 9d1975fa4d9..a4a3819c96c 100644 --- a/arch/arm/mach-pnx4008/clock.c +++ b/arch/arm/mach-pnx4008/clock.c @@ -21,8 +21,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c index abba0089a2a..4e4a84be96b 100644 --- a/arch/arm/mach-pxa/clock.c +++ b/arch/arm/mach-pxa/clock.c @@ -11,8 +11,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h index d8488742b80..12cc0e87e6c 100644 --- a/arch/arm/mach-pxa/clock.h +++ b/arch/arm/mach-pxa/clock.h @@ -1,4 +1,4 @@ -#include +#include struct clkops { void (*enable)(struct clk *); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 07c08151dfe..3d915b1ccdb 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -30,8 +30,8 @@ #include #include #include +#include -#include #include #include #include @@ -47,7 +47,6 @@ #include -#include #include #include #include diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 54b479c35ee..f8f06e9fec3 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -6,7 +6,7 @@ config ARCH_SH7367 bool "SH-Mobile G3 (SH7367)" select CPU_V6 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -14,7 +14,7 @@ config ARCH_SH7377 bool "SH-Mobile G4 (SH7377)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS @@ -22,7 +22,7 @@ config ARCH_SH7372 bool "SH-Mobile AP4 (SH7372)" select CPU_V7 select HAVE_CLK - select COMMON_CLKDEV + select CLKDEV_LOOKUP select SH_CLK_CPG select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-shmobile/clock-sh7367.c b/arch/arm/mach-shmobile/clock-sh7367.c index 9f78729098f..6b186aefcbd 100644 --- a/arch/arm/mach-shmobile/clock-sh7367.c +++ b/arch/arm/mach-shmobile/clock-sh7367.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7367 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 8565aefa21f..445112adba4 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7372 registers */ #define FRQCRA 0xe6150000 diff --git a/arch/arm/mach-shmobile/clock-sh7377.c b/arch/arm/mach-shmobile/clock-sh7377.c index f91395aeb9a..95942466e63 100644 --- a/arch/arm/mach-shmobile/clock-sh7377.c +++ b/arch/arm/mach-shmobile/clock-sh7377.c @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include /* SH7377 registers */ #define RTFRQCR 0xe6150000 diff --git a/arch/arm/mach-tcc8k/clock.c b/arch/arm/mach-tcc8k/clock.c index ba32a15127a..3970a9cdce2 100644 --- a/arch/arm/mach-tcc8k/clock.c +++ b/arch/arm/mach-tcc8k/clock.c @@ -12,8 +12,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index ae19f95585b..77948e0f490 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include "clock.h" #include "board.h" diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h index 94fd859770f..083a4cfc6cf 100644 --- a/arch/arm/mach-tegra/clock.h +++ b/arch/arm/mach-tegra/clock.h @@ -21,7 +21,7 @@ #define __MACH_TEGRA_CLOCK_H #include -#include +#include #define DIV_BUS (1 << 0) #define DIV_U71 (1 << 1) diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c index ae3b308e22a..f0dae6d8ba5 100644 --- a/arch/arm/mach-tegra/tegra2_clocks.c +++ b/arch/arm/mach-tegra/tegra2_clocks.c @@ -24,8 +24,7 @@ #include #include #include - -#include +#include #include diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c index 7458fc6df5c..fabcc49abe8 100644 --- a/arch/arm/mach-u300/clock.c +++ b/arch/arm/mach-u300/clock.c @@ -25,8 +25,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c index 1675047daf2..531de5c6364 100644 --- a/arch/arm/mach-ux500/clock.c +++ b/arch/arm/mach-ux500/clock.c @@ -13,8 +13,7 @@ #include #include #include - -#include +#include #include #include diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index e38acb0f89c..8c1ca1d6353 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -31,8 +31,8 @@ #include #include #include +#include -#include #include #include #include @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index c2e405a9e02..26a02eb5757 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -8,8 +8,8 @@ #include #include #include +#include -#include #include #include #include @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 7eaa232180a..d374a78986e 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -11,15 +11,14 @@ #include #include #include +#include -#include #include #include #include #include #include -#include #include #include diff --git a/arch/arm/mach-w90x900/clock.h b/arch/arm/mach-w90x900/clock.h index c56ddab3d91..b88a1b16b2e 100644 --- a/arch/arm/mach-w90x900/clock.h +++ b/arch/arm/mach-w90x900/clock.h @@ -10,7 +10,7 @@ * the Free Software Foundation; either version 2 of the License. */ -#include +#include void nuc900_clk_enable(struct clk *clk, int enable); void nuc900_subclk_enable(struct clk *clk, int enable); diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index 92c5bb7909f..c9408434a85 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -11,13 +11,13 @@ choice config ARCH_OMAP1 bool "TI OMAP1" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on omap7xx, omap15xx or omap16xx" config ARCH_OMAP2PLUS bool "TI OMAP2/3/4" - select COMMON_CLKDEV + select CLKDEV_LOOKUP help "Systems based on OMAP2, OMAP3 or OMAP4" diff --git a/arch/arm/plat-omap/include/plat/clkdev_omap.h b/arch/arm/plat-omap/include/plat/clkdev_omap.h index bb937f3fabe..4b2028ab4d2 100644 --- a/arch/arm/plat-omap/include/plat/clkdev_omap.h +++ b/arch/arm/plat-omap/include/plat/clkdev_omap.h @@ -8,7 +8,7 @@ #ifndef __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H #define __ARCH_ARM_PLAT_OMAP_INCLUDE_PLAT_CLKDEV_OMAP_H -#include +#include struct omap_clk { u16 cpu; diff --git a/arch/arm/plat-spear/include/plat/clock.h b/arch/arm/plat-spear/include/plat/clock.h index 298bafc0a52..2572260f990 100644 --- a/arch/arm/plat-spear/include/plat/clock.h +++ b/arch/arm/plat-spear/include/plat/clock.h @@ -15,7 +15,7 @@ #define __PLAT_CLOCK_H #include -#include +#include #include /* clk structure flags */ diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c index e593a2a801c..2e712e17ce7 100644 --- a/arch/arm/plat-stmp3xxx/clock.c +++ b/arch/arm/plat-stmp3xxx/clock.c @@ -25,9 +25,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5c075f562eb..cfc51060803 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,7 +1,7 @@ config SUPERH def_bool y select EMBEDDED - select HAVE_CLK + select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT select HAVE_MEMBLOCK select HAVE_OPROFILE diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c index a5ecfbacaf3..87618c91d17 100644 --- a/arch/sh/boards/mach-highlander/setup.c +++ b/arch/sh/boards/mach-highlander/setup.c @@ -24,10 +24,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include diff --git a/arch/sh/include/asm/clkdev.h b/arch/sh/include/asm/clkdev.h index 5645f358128..6ba91868201 100644 --- a/arch/sh/include/asm/clkdev.h +++ b/arch/sh/include/asm/clkdev.h @@ -1,9 +1,5 @@ /* - * arch/sh/include/asm/clkdev.h - * - * Cloned from arch/arm/include/asm/clkdev.h: - * - * Copyright (C) 2008 Russell King. + * Copyright (C) 2010 Paul Mundt * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -11,25 +7,25 @@ * * Helper for the clk API to assist looking up a struct clk. */ -#ifndef __ASM_CLKDEV_H -#define __ASM_CLKDEV_H -struct clk; +#ifndef __CLKDEV__H_ +#define __CLKDEV__H_ -struct clk_lookup { - struct list_head node; - const char *dev_id; - const char *con_id; - struct clk *clk; -}; +#include +#include +#include -struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); +#include -void clkdev_add(struct clk_lookup *cl); -void clkdev_drop(struct clk_lookup *cl); +static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) +{ + if (!slab_is_available()) + return alloc_bootmem_low_pages(size); + else + return kzalloc(size, GFP_KERNEL); +} -void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +#define __clk_put(clk) +#define __clk_get(clk) ({ 1; }) -#endif +#endif /* __CLKDEV_H__ */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 8eed6a48544..cf652217952 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_REMOVE_return_address.o = -pg -obj-y := clkdev.o debugtraps.o dma-nommu.o dumpstack.o \ +obj-y := debugtraps.o dma-nommu.o dumpstack.o \ idle.o io.o irq.o irq_$(BITS).o kdebugfs.o \ machvec.o nmi_debug.o process.o \ process_$(BITS).o ptrace.o ptrace_$(BITS).o \ diff --git a/arch/sh/kernel/clkdev.c b/arch/sh/kernel/clkdev.c deleted file mode 100644 index 1f800ef4a73..00000000000 --- a/arch/sh/kernel/clkdev.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * arch/sh/kernel/clkdev.c - * - * Cloned from arch/arm/common/clkdev.c: - * - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/* - * Find the correct struct clk for the device and connection ID. - * We do slightly fuzzy matching here: - * An entry with a NULL ID is assumed to be a wildcard. - * If an entry has a device ID, it must match - * If an entry has a connection ID, it must match - * Then we take the most specific entry - with the following - * order of precedence: dev+con > dev only > con only. - */ -static struct clk *clk_find(const char *dev_id, const char *con_id) -{ - struct clk_lookup *p; - struct clk *clk = NULL; - int match, best = 0; - - list_for_each_entry(p, &clocks, node) { - match = 0; - if (p->dev_id) { - if (!dev_id || strcmp(p->dev_id, dev_id)) - continue; - match += 2; - } - if (p->con_id) { - if (!con_id || strcmp(p->con_id, con_id)) - continue; - match += 1; - } - if (match == 0) - continue; - - if (match > best) { - clk = p->clk; - best = match; - } - } - return clk; -} - -struct clk *clk_get_sys(const char *dev_id, const char *con_id) -{ - struct clk *clk; - - mutex_lock(&clocks_mutex); - clk = clk_find(dev_id, con_id); - mutex_unlock(&clocks_mutex); - - return clk ? clk : ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get_sys); - -void clkdev_add(struct clk_lookup *cl) -{ - mutex_lock(&clocks_mutex); - list_add_tail(&cl->node, &clocks); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clkdev_add); - -void __init clkdev_add_table(struct clk_lookup *cl, size_t num) -{ - mutex_lock(&clocks_mutex); - while (num--) { - list_add_tail(&cl->node, &clocks); - cl++; - } - mutex_unlock(&clocks_mutex); -} - -#define MAX_DEV_ID 20 -#define MAX_CON_ID 16 - -struct clk_lookup_alloc { - struct clk_lookup cl; - char dev_id[MAX_DEV_ID]; - char con_id[MAX_CON_ID]; -}; - -struct clk_lookup * __init_refok -clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) -{ - struct clk_lookup_alloc *cla; - - if (!slab_is_available()) - cla = alloc_bootmem_low_pages(sizeof(*cla)); - else - cla = kzalloc(sizeof(*cla), GFP_KERNEL); - - if (!cla) - return NULL; - - cla->cl.clk = clk; - if (con_id) { - strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); - cla->cl.con_id = cla->con_id; - } - - if (dev_fmt) { - va_list ap; - - va_start(ap, dev_fmt); - vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); - cla->cl.dev_id = cla->dev_id; - va_end(ap); - } - - return &cla->cl; -} -EXPORT_SYMBOL(clkdev_alloc); - -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev) -{ - struct clk *r = clk_get(dev, id); - struct clk_lookup *l; - - if (IS_ERR(r)) - return PTR_ERR(r); - - l = clkdev_alloc(r, alias, alias_dev_name); - clk_put(r); - if (!l) - return -ENODEV; - clkdev_add(l); - return 0; -} -EXPORT_SYMBOL(clk_add_alias); - -/* - * clkdev_drop - remove a clock dynamically allocated - */ -void clkdev_drop(struct clk_lookup *cl) -{ - struct clk_lookup_alloc *cla = container_of(cl, struct clk_lookup_alloc, cl); - - mutex_lock(&clocks_mutex); - list_del(&cl->node); - mutex_unlock(&clocks_mutex); - kfree(cla); -} -EXPORT_SYMBOL(clkdev_drop); diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c index e2f63d68da5..dd0e0f21135 100644 --- a/arch/sh/kernel/cpu/clock-cpg.c +++ b/arch/sh/kernel/cpu/clock-cpg.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include static struct clk master_clk = { diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c index 50f887dda56..4187cf4fe18 100644 --- a/arch/sh/kernel/cpu/clock.c +++ b/arch/sh/kernel/cpu/clock.c @@ -48,20 +48,4 @@ int __init clk_init(void) return ret; } -/* - * Returns a clock. Note that we first try to use device id on the bus - * and clock name. If this fails, we try to use clock name only. - */ -struct clk *clk_get(struct device *dev, const char *con_id) -{ - const char *dev_id = dev ? dev_name(dev) : NULL; - - return clk_get_sys(dev_id, con_id); -} -EXPORT_SYMBOL_GPL(clk_get); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL_GPL(clk_put); diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c index 4eabc68cd75..6c1492b8431 100644 --- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c index 71291ae201b..93c646072c1 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7343.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /* SH7343 registers */ diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c index 7ce5bbcd408..049dc0628cc 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7366.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /* SH7366 registers */ diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c index 2030f3d9fac..9d23a36f064 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c index d3938f0d370..55493cd5bd8 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7723.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c index 2d9700c6b53..527936bb3ce 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c index ce39a2ae8c6..e073e3eb4c3 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c index 1f1df48008c..599630fc4d3 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7763.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7763.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c index 62d70635006..8894926479a 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7780.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7780.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c index c3e458aaa2b..2d960247f3e 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7785.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7785.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c index 597c9fbe49c..42e403be907 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4a/clock-shx3.c b/arch/sh/kernel/cpu/sh4a/clock-shx3.c index 4f70df6b616..1afdb93b8cc 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/clock-shx3.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/Kconfig b/drivers/Kconfig index a2b902f4d43..3d93b3a3d63 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -111,4 +111,6 @@ source "drivers/xen/Kconfig" source "drivers/staging/Kconfig" source "drivers/platform/Kconfig" + +source "drivers/clk/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 14cf9077bb2..4af7d5b124c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -114,3 +114,5 @@ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ obj-y += ieee802154/ +#common clk code +obj-y += clk/ diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig new file mode 100644 index 00000000000..4168c8896e1 --- /dev/null +++ b/drivers/clk/Kconfig @@ -0,0 +1,4 @@ + +config CLKDEV_LOOKUP + bool + select HAVE_CLK diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile new file mode 100644 index 00000000000..07613fa172c --- /dev/null +++ b/drivers/clk/Makefile @@ -0,0 +1,2 @@ + +obj-$(CONFIG_CLKDEV_LOOKUP) += clkdev.o diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c new file mode 100644 index 00000000000..0fc0a79852d --- /dev/null +++ b/drivers/clk/clkdev.c @@ -0,0 +1,176 @@ +/* + * drivers/clk/clkdev.c + * + * Copyright (C) 2008 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Helper for the clk API to assist looking up a struct clk. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(clocks); +static DEFINE_MUTEX(clocks_mutex); + +/* + * Find the correct struct clk for the device and connection ID. + * We do slightly fuzzy matching here: + * An entry with a NULL ID is assumed to be a wildcard. + * If an entry has a device ID, it must match + * If an entry has a connection ID, it must match + * Then we take the most specific entry - with the following + * order of precedence: dev+con > dev only > con only. + */ +static struct clk *clk_find(const char *dev_id, const char *con_id) +{ + struct clk_lookup *p; + struct clk *clk = NULL; + int match, best = 0; + + list_for_each_entry(p, &clocks, node) { + match = 0; + if (p->dev_id) { + if (!dev_id || strcmp(p->dev_id, dev_id)) + continue; + match += 2; + } + if (p->con_id) { + if (!con_id || strcmp(p->con_id, con_id)) + continue; + match += 1; + } + + if (match > best) { + clk = p->clk; + if (match != 3) + best = match; + else + break; + } + } + return clk; +} + +struct clk *clk_get_sys(const char *dev_id, const char *con_id) +{ + struct clk *clk; + + mutex_lock(&clocks_mutex); + clk = clk_find(dev_id, con_id); + if (clk && !__clk_get(clk)) + clk = NULL; + mutex_unlock(&clocks_mutex); + + return clk ? clk : ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL(clk_get_sys); + +struct clk *clk_get(struct device *dev, const char *con_id) +{ + const char *dev_id = dev ? dev_name(dev) : NULL; + + return clk_get_sys(dev_id, con_id); +} +EXPORT_SYMBOL(clk_get); + +void clk_put(struct clk *clk) +{ + __clk_put(clk); +} +EXPORT_SYMBOL(clk_put); + +void clkdev_add(struct clk_lookup *cl) +{ + mutex_lock(&clocks_mutex); + list_add_tail(&cl->node, &clocks); + mutex_unlock(&clocks_mutex); +} +EXPORT_SYMBOL(clkdev_add); + +void __init clkdev_add_table(struct clk_lookup *cl, size_t num) +{ + mutex_lock(&clocks_mutex); + while (num--) { + list_add_tail(&cl->node, &clocks); + cl++; + } + mutex_unlock(&clocks_mutex); +} + +#define MAX_DEV_ID 20 +#define MAX_CON_ID 16 + +struct clk_lookup_alloc { + struct clk_lookup cl; + char dev_id[MAX_DEV_ID]; + char con_id[MAX_CON_ID]; +}; + +struct clk_lookup * __init_refok +clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) +{ + struct clk_lookup_alloc *cla; + + cla = __clkdev_alloc(sizeof(*cla)); + if (!cla) + return NULL; + + cla->cl.clk = clk; + if (con_id) { + strlcpy(cla->con_id, con_id, sizeof(cla->con_id)); + cla->cl.con_id = cla->con_id; + } + + if (dev_fmt) { + va_list ap; + + va_start(ap, dev_fmt); + vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap); + cla->cl.dev_id = cla->dev_id; + va_end(ap); + } + + return &cla->cl; +} +EXPORT_SYMBOL(clkdev_alloc); + +int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, + struct device *dev) +{ + struct clk *r = clk_get(dev, id); + struct clk_lookup *l; + + if (IS_ERR(r)) + return PTR_ERR(r); + + l = clkdev_alloc(r, alias, alias_dev_name); + clk_put(r); + if (!l) + return -ENODEV; + clkdev_add(l); + return 0; +} +EXPORT_SYMBOL(clk_add_alias); + +/* + * clkdev_drop - remove a clock dynamically allocated + */ +void clkdev_drop(struct clk_lookup *cl) +{ + mutex_lock(&clocks_mutex); + list_del(&cl->node); + mutex_unlock(&clocks_mutex); + kfree(cl); +} +EXPORT_SYMBOL(clkdev_drop); diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h new file mode 100644 index 00000000000..457bcb0a310 --- /dev/null +++ b/include/linux/clkdev.h @@ -0,0 +1,36 @@ +/* + * include/linux/clkdev.h + * + * Copyright (C) 2008 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Helper for the clk API to assist looking up a struct clk. + */ +#ifndef __CLKDEV_H +#define __CLKDEV_H + +#include + +struct clk; +struct device; + +struct clk_lookup { + struct list_head node; + const char *dev_id; + const char *con_id; + struct clk *clk; +}; + +struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, + const char *dev_fmt, ...); + +void clkdev_add(struct clk_lookup *cl); +void clkdev_drop(struct clk_lookup *cl); + +void clkdev_add_table(struct clk_lookup *, size_t); +int clk_add_alias(const char *, const char *, char *, struct device *); + +#endif -- cgit v1.2.3-70-g09d2 From 65500fa94aaeb3475e39c0c5180f188014164ca4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 4 Nov 2010 13:06:59 +0100 Subject: ARM: 6467/1: amba: optional PrimeCell core voltage switch On some contemporary sub-micron SoCs, peripherals on the chip have power domain switches, i.e. the voltage to the core may be turned off to conserve power. In the Ux500 we have this for out PrimeCell derivates. This patch makes it possible to specify an (optional) regulator to handle the voltage domain switch on AMBA PrimeCells, modeled very similar to how block clocks are handled. Additional amba_vcore_[enable|disable] calls are supplied to make it possible introduce optional powering off of the core voltage. Using this will require code to spool/unspool any core HW state. Cc: Rabin Vincent Cc: Bengt Jonsson Cc: Jonas Aaberg Signed-off-by: Linus Walleij Signed-off-by: Russell King --- drivers/amba/bus.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/amba/bus.h | 8 ++++++++ 2 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 2737b975220..e7df019d29d 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -147,6 +147,39 @@ static void amba_put_disable_pclk(struct amba_device *pcdev) clk_put(pclk); } +static int amba_get_enable_vcore(struct amba_device *pcdev) +{ + struct regulator *vcore = regulator_get(&pcdev->dev, "vcore"); + int ret; + + pcdev->vcore = vcore; + + if (IS_ERR(vcore)) { + /* It is OK not to supply a vcore regulator */ + if (PTR_ERR(vcore) == -ENODEV) + return 0; + return PTR_ERR(vcore); + } + + ret = regulator_enable(vcore); + if (ret) { + regulator_put(vcore); + pcdev->vcore = ERR_PTR(-ENODEV); + } + + return ret; +} + +static void amba_put_disable_vcore(struct amba_device *pcdev) +{ + struct regulator *vcore = pcdev->vcore; + + if (!IS_ERR(vcore)) { + regulator_disable(vcore); + regulator_put(vcore); + } +} + /* * These are the device model conversion veneers; they convert the * device model structures to our more specific structures. @@ -159,6 +192,10 @@ static int amba_probe(struct device *dev) int ret; do { + ret = amba_get_enable_vcore(pcdev); + if (ret) + break; + ret = amba_get_enable_pclk(pcdev); if (ret) break; @@ -168,6 +205,7 @@ static int amba_probe(struct device *dev) break; amba_put_disable_pclk(pcdev); + amba_put_disable_vcore(pcdev); } while (0); return ret; @@ -180,6 +218,7 @@ static int amba_remove(struct device *dev) int ret = drv->remove(pcdev); amba_put_disable_pclk(pcdev); + amba_put_disable_vcore(pcdev); return ret; } diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index c6454cca044..9e7f259346e 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -18,6 +18,7 @@ #include #include #include +#include #define AMBA_NR_IRQS 2 #define AMBA_CID 0xb105f00d @@ -28,6 +29,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct regulator *vcore; u64 dma_mask; unsigned int periphid; unsigned int irq[AMBA_NR_IRQS]; @@ -71,6 +73,12 @@ void amba_release_regions(struct amba_device *); #define amba_pclk_disable(d) \ do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) +#define amba_vcore_enable(d) \ + (IS_ERR((d)->vcore) ? 0 : regulator_enable((d)->vcore)) + +#define amba_vcore_disable(d) \ + do { if (!IS_ERR((d)->vcore)) regulator_disable((d)->vcore); } while (0) + /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) -- cgit v1.2.3-70-g09d2 From dddd3379a619a4cb8247bfd3c94ca9ae3797aa2e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 Nov 2010 10:05:55 +0100 Subject: perf: Fix inherit vs. context rotation bug It was found that sometimes children of tasks with inherited events had one extra event. Eventually it turned out to be due to the list rotation no being exclusive with the list iteration in the inheritance code. Cure this by temporarily disabling the rotation while we inherit the events. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/perf_event.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 40150f34598..142e3d6042c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -850,6 +850,7 @@ struct perf_event_context { int nr_active; int is_active; int nr_stat; + int rotate_disable; atomic_t refcount; struct task_struct *task; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 671f6c8c8a3..f365dd8ef8b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1622,8 +1622,12 @@ static void rotate_ctx(struct perf_event_context *ctx) { raw_spin_lock(&ctx->lock); - /* Rotate the first entry last of non-pinned groups */ - list_rotate_left(&ctx->flexible_groups); + /* + * Rotate the first entry last of non-pinned groups. Rotation might be + * disabled by the inheritance code. + */ + if (!ctx->rotate_disable) + list_rotate_left(&ctx->flexible_groups); raw_spin_unlock(&ctx->lock); } @@ -6162,6 +6166,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) struct perf_event *event; struct task_struct *parent = current; int inherited_all = 1; + unsigned long flags; int ret = 0; child->perf_event_ctxp[ctxn] = NULL; @@ -6202,6 +6207,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) break; } + /* + * We can't hold ctx->lock when iterating the ->flexible_group list due + * to allocations, but we need to prevent rotation because + * rotate_ctx() will change the list from interrupt context. + */ + raw_spin_lock_irqsave(&parent_ctx->lock, flags); + parent_ctx->rotate_disable = 1; + raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); + list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); @@ -6209,6 +6223,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) break; } + raw_spin_lock_irqsave(&parent_ctx->lock, flags); + parent_ctx->rotate_disable = 0; + raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); + child_ctx = child->perf_event_ctxp[ctxn]; if (child_ctx && inherited_all) { -- cgit v1.2.3-70-g09d2 From ee6dcfa40a50fe12a3ae0fb4d2653c66c3ed6556 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Nov 2010 13:49:04 +0100 Subject: perf: Fix the software context switch counter Stephane noticed that because the perf_sw_event() call is inside the perf_event_task_sched_out() call it won't get called unless we have a per-task counter. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 29 +++++++++++++++-------------- kernel/perf_event.c | 2 -- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 142e3d6042c..de2c41758e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -909,20 +909,6 @@ extern int perf_num_counters(void); extern const char *perf_pmu_name(void); extern void __perf_event_task_sched_in(struct task_struct *task); extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); - -extern atomic_t perf_task_events; - -static inline void perf_event_task_sched_in(struct task_struct *task) -{ - COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); -} - -static inline -void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) -{ - COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); -} - extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1031,6 +1017,21 @@ have_event: __perf_sw_event(event_id, nr, nmi, regs, addr); } +extern atomic_t perf_task_events; + +static inline void perf_event_task_sched_in(struct task_struct *task) +{ + COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); +} + +static inline +void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +{ + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + + COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f365dd8ef8b..eac7e336433 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1287,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); } -- cgit v1.2.3-70-g09d2 From 335d7afbfb71faac833734a94240c1e07cf0ead8 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 22 Nov 2010 15:47:36 +0100 Subject: mutexes, sched: Introduce arch_mutex_cpu_relax() The spinning mutex implementation uses cpu_relax() in busy loops as a compiler barrier. Depending on the architecture, cpu_relax() may do more than needed in this specific mutex spin loops. On System z we also give up the time slice of the virtual cpu in cpu_relax(), which prevents effective spinning on the mutex. This patch replaces cpu_relax() in the spinning mutex code with arch_mutex_cpu_relax(), which can be defined by each architecture that selects HAVE_ARCH_MUTEX_CPU_RELAX. The default is still cpu_relax(), so this patch should not affect other architectures than System z for now. Signed-off-by: Gerald Schaefer Signed-off-by: Peter Zijlstra LKML-Reference: <1290437256.7455.4.camel@thinkpad> Signed-off-by: Ingo Molnar --- arch/Kconfig | 3 +++ arch/s390/Kconfig | 1 + arch/s390/include/asm/mutex.h | 2 ++ include/linux/mutex.h | 4 ++++ kernel/mutex.c | 2 +- kernel/sched.c | 3 ++- 6 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 8bf0fa652eb..f78c2be4242 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI config HAVE_ARCH_JUMP_LABEL bool +config HAVE_ARCH_MUTEX_CPU_RELAX + bool + source "kernel/gcov/Kconfig" diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e0b98e71ff4..6c6d7b339aa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -99,6 +99,7 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_GET_USER_PAGES_FAST + select HAVE_ARCH_MUTEX_CPU_RELAX select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 458c1f7fbc1..688271f5f2e 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,3 +7,5 @@ */ #include + +#define arch_mutex_cpu_relax() barrier() diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f363bc8fdc7..94b48bd40dd 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX +#define arch_mutex_cpu_relax() cpu_relax() +#endif + #endif diff --git a/kernel/mutex.c b/kernel/mutex.c index 200407c1502..a5889fb28ec 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax(); + arch_mutex_cpu_relax(); } #endif spin_lock_mutex(&lock->wait_lock, flags); diff --git a/kernel/sched.c b/kernel/sched.c index 3e8a7db951a..abe7aec5576 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -75,6 +75,7 @@ #include #include +#include #include "sched_cpupri.h" #include "workqueue_sched.h" @@ -3888,7 +3889,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) if (task_thread_info(rq->curr) != owner || need_resched()) return 0; - cpu_relax(); + arch_mutex_cpu_relax(); } return 1; -- cgit v1.2.3-70-g09d2 From 6c7e550f13f8ad82efb6a5653ae628c2543c1768 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Tue, 23 Nov 2010 16:21:43 +0100 Subject: perf: Introduce is_sampling_event() and use it when appropriate. Signed-off-by: Franck Bui-Huu Signed-off-by: Peter Zijlstra LKML-Reference: <1290525705-6265-1-git-send-email-fbuihuu@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- include/linux/perf_event.h | 5 +++++ kernel/perf_event.c | 10 +++++----- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7c1a4c35fd4..c01dfec635d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -442,7 +442,7 @@ static int x86_setup_perfctr(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 config; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index de2c41758e2..cbf04cc1e63 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -969,6 +969,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool is_sampling_event(struct perf_event *event) +{ + return event->attr.sample_period != 0; +} + /* * Return 1 for a software event, 0 for a hardware event */ diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 43f757ccf83..880698488c9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2514,7 +2514,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) int ret = 0; u64 value; - if (!event->attr.sample_period) + if (!is_sampling_event(event)) return -EINVAL; if (copy_from_user(&value, arg, sizeof(value))) @@ -4385,7 +4385,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, if (!regs) return; - if (!hwc->sample_period) + if (!is_sampling_event(event)) return; if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) @@ -4548,7 +4548,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; - if (hwc->sample_period) { + if (is_sampling_event(event)) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); } @@ -4920,7 +4920,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { + if (is_sampling_event(event)) { s64 period = local64_read(&hwc->period_left); if (period) { @@ -4941,7 +4941,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (hwc->sample_period) { + if (is_sampling_event(event)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); -- cgit v1.2.3-70-g09d2 From 004417a6d468e24399e383645c068b498eed84ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Nov 2010 18:38:29 +0100 Subject: perf, arch: Cleanup perf-pmu init vs lockup-detector The perf hardware pmu got initialized at various points in the boot, some before early_initcall() some after (notably arch_initcall). The problem is that the NMI lockup detector is ran from early_initcall() and expects the hardware pmu to be present. Sanitize this by moving all architecture hardware pmu implementations to initialize at early_initcall() and move the lockup detector to an explicit initcall right after that. Cc: paulus Cc: davem Cc: Michael Cree Cc: Deng-Cheng Zhu Acked-by: Paul Mundt Acked-by: Will Deacon Signed-off-by: Peter Zijlstra LKML-Reference: <1290707759.2145.119.camel@laptop> Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/perf_event.h | 6 ------ arch/alpha/kernel/irq_alpha.c | 2 -- arch/alpha/kernel/perf_event.c | 9 ++++++--- arch/arm/kernel/perf_event.c | 2 +- arch/mips/kernel/perf_event_mipsxx.c | 2 +- arch/powerpc/kernel/e500-pmu.c | 2 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/sh/kernel/cpu/sh4/perf_event.c | 2 +- arch/sh/kernel/cpu/sh4a/perf_event.c | 2 +- arch/sparc/include/asm/perf_event.h | 4 ---- arch/sparc/kernel/nmi.c | 2 -- arch/sparc/kernel/perf_event.c | 7 +++++-- arch/x86/include/asm/perf_event.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/perf_event.c | 11 +++++++---- include/linux/sched.h | 4 ++++ init/main.c | 1 + kernel/watchdog.c | 7 +++---- 24 files changed, 38 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index fe792ca818f..5996e7a6757 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h @@ -1,10 +1,4 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H -#ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); -#else -static inline void init_hw_perf_events(void) { } -#endif - #endif /* __ASM_ALPHA_PERF_EVENT_H */ diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 5f77afb88e8..4c8bb374eb0 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -112,8 +112,6 @@ init_IRQ(void) wrent(entInt, 0); alpha_mv.init_irq(); - - init_hw_perf_events(); } /* diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 1cc49683fb6..3283059b6e8 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* * Init call to initialise performance events at kernel startup. */ -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_cpu()) { pr_cont("No support for your CPU.\n"); - return; + return 0; } pr_cont("Supported CPU type!\n"); @@ -882,5 +883,7 @@ void __init init_hw_perf_events(void) alpha_pmu = &ev67_pmu; perf_pmu_register(&pmu); -} + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492..d45f70e5f2e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3038,7 +3038,7 @@ init_hw_perf_events(void) return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); /* * Callchain handling code. diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 5c7c6fc0756..183e0d22666 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1047,6 +1047,6 @@ init_hw_perf_events(void) return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); #endif /* defined(CONFIG_CPU_MIPS32)... */ diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d894..b150b510510 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -arch_initcall(init_e500_pmu); +early_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f31..2cc5e0301d0 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -arch_initcall(init_mpc7450_pmu); +early_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda63..ead8b3c2649 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -arch_initcall(init_power4_pmu); +early_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d41..eca0ac595cb 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -arch_initcall(init_power5p_pmu); +early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9d..d5ff0f64a5e 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -arch_initcall(init_power5_pmu); +early_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8f..31603927e37 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -arch_initcall(init_power6_pmu); +early_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b4..593740fcb79 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -arch_initcall(init_power7_pmu); +early_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4d..9a6e093858f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -arch_initcall(init_ppc970_pmu); +early_initcall(init_ppc970_pmu); diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c index dbf3b4bb71f..748955df018 100644 --- a/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void) return register_sh_pmu(&sh7750_pmu); } -arch_initcall(sh7750_pmu_init); +early_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c index 58027652573..17e6bebfede 100644 --- a/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void) return register_sh_pmu(&sh4a_pmu); } -arch_initcall(sh4a_pmu_init); +early_initcall(sh4a_pmu_init); diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 6e8bfa1786d..4d3dbe3703e 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PERF_EVENTS #include -extern void init_hw_perf_events(void); - #define perf_arch_fetch_caller_regs(regs, ip) \ do { \ unsigned long _pstate, _asi, _pil, _i7, _fp; \ @@ -26,8 +24,6 @@ do { \ (regs)->u_regs[UREG_I6] = _fp; \ (regs)->u_regs[UREG_I7] = _i7; \ } while (0) -#else -static inline void init_hw_perf_events(void) { } #endif #endif diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a4bd7ba74c8..300f810142f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -270,8 +270,6 @@ int __init nmi_init(void) atomic_set(&nmi_active, -1); } } - if (!err) - init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0d6deb55a2a..75c5b126397 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void) return false; } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; + return 0; } pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); + + return 0; } +early_initcall(init_hw_perf_event); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb..d9d4dae305f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -125,7 +125,6 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 @@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); } #else -static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda3093..1d59834396b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c01dfec635d..817d2b195e8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1353,7 +1353,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1368,11 +1368,11 @@ void __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return; + return 0; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); - return; + return 0; } pmu_check_apic(); @@ -1380,7 +1380,7 @@ void __init init_hw_perf_events(void) /* sanity check that the hardware exists or is emulated */ if (!check_hw_exists()) { pr_cont("Broken PMU hardware detected, software events only.\n"); - return; + return 0; } pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -1431,7 +1431,10 @@ void __init init_hw_perf_events(void) perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); + + return 0; } +early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c79e921a68..d2e63d1e725 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; +void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) { @@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } +static inline void lockup_detector_init(void) +{ +} #endif #ifdef CONFIG_DETECT_HUNG_TASK diff --git a/init/main.c b/init/main.c index 8646401f7a0..261ad7b3fe0 100644 --- a/init/main.c +++ b/init/main.c @@ -882,6 +882,7 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); + lockup_detector_init(); smp_init(); sched_init_smp(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e3c41a4024..cad4e42060a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -547,13 +547,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -static int __init spawn_watchdog_task(void) +void __init lockup_detector_init(void) { void *cpu = (void *)(long)smp_processor_id(); int err; if (no_watchdog) - return 0; + return; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); WARN_ON(notifier_to_errno(err)); @@ -561,6 +561,5 @@ static int __init spawn_watchdog_task(void) cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - return 0; + return; } -early_initcall(spawn_watchdog_task); -- cgit v1.2.3-70-g09d2 From 5a0d2268d259886f0c87131639d19eb4a67b4532 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 10:42:02 +0000 Subject: net: add netif_tx_queue_frozen_or_stopped When testing struct netdev_queue state against FROZEN bit, we also test XOFF bit. We can test both bits at once and save some cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- net/core/netpoll.c | 3 +-- net/core/pktgen.c | 2 +- net/sched/sch_generic.c | 8 +++----- net/sched/sch_teql.c | 3 +-- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index badf9285fe0..7c6ae2f4b9a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -493,6 +493,8 @@ static inline void napi_synchronize(const struct napi_struct *n) enum netdev_queue_state_t { __QUEUE_STATE_XOFF, __QUEUE_STATE_FROZEN, +#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF) | \ + (1 << __QUEUE_STATE_FROZEN)) }; struct netdev_queue { @@ -1629,9 +1631,9 @@ static inline int netif_queue_stopped(const struct net_device *dev) return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } -static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue) +static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue) { - return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state); + return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN; } /** diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af..ee38acb6d46 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq) || + if (netif_tx_queue_frozen_or_stopped(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2e57830cbeb..2953b2abc97 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3527,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { + if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5dbb3cd96e5..7f0bd895264 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { + if (!netif_tx_queue_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else @@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_frozen_or_stopped(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, ret = dev_requeue_skb(skb, q); } - if (ret && (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq))) + if (ret && netif_tx_queue_frozen_or_stopped(txq)) ret = 0; return ret; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 401af959670..106479a7c94 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -309,8 +309,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); - if (!netif_tx_queue_stopped(slave_txq) && - !netif_tx_queue_frozen(slave_txq) && + if (!netif_tx_queue_frozen_or_stopped(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); -- cgit v1.2.3-70-g09d2 From c66fb347946ebdd5b10908866ecc9fa05ee2cf3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Nov 2010 14:09:57 -0800 Subject: Export 'get_pipe_info()' to other users And in particular, use it in 'pipe_fcntl()'. The other pipe functions do not need to use the 'careful' version, since they are only ever called for things that are already known to be pipes. The normal read/write/ioctl functions are called through the file operations structures, so if a file isn't a pipe, they'd never get called. But pipe_fcntl() is special, and called directly from the generic fcntl code, and needs to use the same careful function that the splice code is using. Cc: Jens Axboe Cc: Andrew Morton Cc: Al Viro Cc: Dave Jones Signed-off-by: Linus Torvalds --- fs/pipe.c | 2 +- fs/splice.c | 11 ----------- include/linux/pipe_fs_i.h | 12 ++++++++++++ 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index a8012a95572..b8997f8c632 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1204,7 +1204,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) struct pipe_inode_info *pipe; long ret; - pipe = file->f_path.dentry->d_inode->i_pipe; + pipe = get_pipe_info(file); if (!pipe) return -EBADF; diff --git a/fs/splice.c b/fs/splice.c index 0d92dabcc57..ce2f02579e3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1311,17 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} /* * Determine where to splice to/from. diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 445796945ac..3c5ac314742 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -161,4 +161,16 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +static inline struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + #endif -- cgit v1.2.3-70-g09d2 From 72083646528d4887b920deb71b37e09bc7d227bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Nov 2010 16:27:19 -0800 Subject: Un-inline get_pipe_info() helper function This avoids some include-file hell, and the function isn't really important enough to be inlined anyway. Reported-by: Ingo Molnar Signed-off-by: Linus Torvalds --- fs/pipe.c | 12 ++++++++++++ include/linux/pipe_fs_i.h | 13 +------------ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index b8997f8c632..04629f36e39 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1199,6 +1199,18 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, return ret; } +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +struct pipe_inode_info *get_pipe_info(struct file *file) +{ + struct inode *i = file->f_path.dentry->d_inode; + + return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; +} + long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { struct pipe_inode_info *pipe; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3c5ac314742..bb27d7ec2fb 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -160,17 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); - -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *get_pipe_info(struct file *file) -{ - struct inode *i = file->f_path.dentry->d_inode; - - return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; -} +struct pipe_inode_info *get_pipe_info(struct file *file); #endif -- cgit v1.2.3-70-g09d2 From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 26 Nov 2010 08:36:09 +0000 Subject: xps: Add CONFIG_XPS This patch adds XPS_CONFIG option to enable and disable XPS. This is done in the same manner as RPS_CONFIG. This is also fixes build failure in XPS code when SMP is not enabled. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 52 +++++++++++++++++++++++++---------------------- net/Kconfig | 5 +++++ net/core/dev.c | 9 +++++--- net/core/net-sysfs.c | 47 ++++++++++++++++++++++++++++++------------ net/core/net-sysfs.h | 3 --- 5 files changed, 73 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c6ae2f4b9a..9ae4544f0cf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,30 +535,6 @@ struct rps_map { }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) -/* - * This structure holds an XPS map which can be of variable length. The - * map is an array of queues. - */ -struct xps_map { - unsigned int len; - unsigned int alloc_len; - struct rcu_head rcu; - u16 queues[0]; -}; -#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) -#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ - / sizeof(u16)) - -/* - * This structure holds all XPS maps for device. Maps are indexed by CPU. - */ -struct xps_dev_maps { - struct rcu_head rcu; - struct xps_map *cpu_map[0]; -}; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) - /* * The rps_dev_flow structure contains the mapping of a flow to a CPU and the * tail pointer for that CPU's input queue at the time of last enqueue. @@ -626,6 +602,32 @@ struct netdev_rx_queue { } ____cacheline_aligned_in_smp; #endif /* CONFIG_RPS */ +#ifdef CONFIG_XPS +/* + * This structure holds an XPS map which can be of variable length. The + * map is an array of queues. + */ +struct xps_map { + unsigned int len; + unsigned int alloc_len; + struct rcu_head rcu; + u16 queues[0]; +}; +#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16))) +#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ + / sizeof(u16)) + +/* + * This structure holds all XPS maps for device. Maps are indexed by CPU. + */ +struct xps_dev_maps { + struct rcu_head rcu; + struct xps_map *cpu_map[0]; +}; +#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * sizeof(struct xps_map *))) +#endif /* CONFIG_XPS */ + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1046,7 +1048,9 @@ struct net_device { unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; +#ifdef CONFIG_XPS struct xps_dev_maps *xps_maps; +#endif /* These may be needed for future network-power-down code. */ diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd..126c2af0fc1 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -220,6 +220,11 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config XPS + boolean + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + default y + menu "Network testing" config NET_PKTGEN diff --git a/net/core/dev.c b/net/core/dev.c index c852f0038a0..3259d2c323a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); + if (rc) + return rc; + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { -#ifdef CONFIG_RPS +#ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; struct xps_map *map; int queue_index = -1; @@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { -#ifdef CONFIG_RPS unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; @@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) for (i = 0; i < count; i++) rx[i].dev = dev; -#endif return 0; } +#endif static int netif_alloc_netdev_queues(struct net_device *dev) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 68dbbfdee27..99c11294623 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -751,10 +751,12 @@ static int rx_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_RPS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_RPS int i; int error = 0; @@ -770,8 +772,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_rx[i].kobj); return error; +#else + return 0; +#endif } +#ifdef CONFIG_XPS /* * netdev_queue sysfs structures and functions. */ @@ -1090,10 +1096,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) return error; } +#endif /* CONFIG_XPS */ int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_XPS int i; int error = 0; @@ -1109,27 +1117,36 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_tx[i].kobj); return error; +#else + return 0; +#endif } static int register_queue_kobjects(struct net_device *net) { - int error = 0, txq = 0, rxq = 0; + int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; +#endif + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; - error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); + error = net_rx_queue_update_kobjects(net, 0, real_rx); if (error) goto error; - rxq = net->real_num_rx_queues; + rxq = real_rx; - error = netdev_queue_update_kobjects(net, 0, - net->real_num_tx_queues); + error = netdev_queue_update_kobjects(net, 0, real_tx); if (error) goto error; - txq = net->real_num_tx_queues; + txq = real_tx; return 0; @@ -1141,11 +1158,19 @@ error: static void remove_queue_kobjects(struct net_device *net) { - net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); - netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0); + int real_rx = 0, real_tx = 0; + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + net_rx_queue_update_kobjects(net, real_rx, 0); + netdev_queue_update_kobjects(net, real_tx, 0); +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) kset_unregister(net->queues_kset); +#endif } -#endif /* CONFIG_RPS */ static const void *net_current_ns(void) { @@ -1244,9 +1269,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); -#ifdef CONFIG_RPS remove_queue_kobjects(net); -#endif device_del(dev); } @@ -1285,13 +1308,11 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; -#ifdef CONFIG_RPS error = register_queue_kobjects(net); if (error) { device_del(dev); return error; } -#endif return error; } diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 25ec2ee57df..bd7751ec1c4 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,11 +4,8 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); -#ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num); #endif - -#endif -- cgit v1.2.3-70-g09d2 From 1ae0affedce1d3e401991fbe7f2674753f0a7641 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 26 Nov 2010 23:02:58 +0000 Subject: mmc, sh: Correct value for reset This resolves a regression that I introduced in "mmc, sh: Move constants to sh_mmcif.h". Having examined the manual and tested the code on an AP4EVB board it seems that the correct sequence is. 1) Write 1 to bit 31 and zeros to all other bits 2) Write zero to all bits Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- include/linux/mmc/sh_mmcif.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index a6bfa529649..342ec1a3868 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -80,7 +80,7 @@ struct sh_mmcif_plat_data { /* CE_VERSION */ #define SOFT_RST_ON (1 << 31) -#define SOFT_RST_OFF ~SOFT_RST_ON +#define SOFT_RST_OFF 0 static inline u32 sh_mmcif_readl(void __iomem *addr, int reg) { @@ -168,12 +168,9 @@ static inline int sh_mmcif_boot_do_read(void __iomem *base, static inline void sh_mmcif_boot_init(void __iomem *base) { - unsigned long tmp; - /* reset */ - tmp = sh_mmcif_readl(base, MMCIF_CE_VERSION); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp | SOFT_RST_ON); - sh_mmcif_writel(base, MMCIF_CE_VERSION, tmp & SOFT_RST_OFF); + sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_ON); + sh_mmcif_writel(base, MMCIF_CE_VERSION, SOFT_RST_OFF); /* byte swap */ sh_mmcif_writel(base, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP); -- cgit v1.2.3-70-g09d2 From 22efa0fee32d9e7f6f6fbc396a872b5708d86048 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sat, 27 Nov 2010 00:11:55 +0000 Subject: sh, mmc: Use defines when setting CE_CLK_CTRL The 16-19th bits of CE_CLK_CTRL set the MMC clock frequency. Cc: Yusuke Goda Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- include/linux/mmc/sh_mmcif.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 342ec1a3868..ffabf8c0a53 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -70,6 +70,9 @@ struct sh_mmcif_plat_data { #define CLK_ENABLE (1 << 24) /* 1: output mmc clock */ #define CLK_CLEAR ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) #define CLK_SUP_PCLK ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16)) +#define CLKDIV_4 (1<<16) /* mmc clock frequency. + * n: bus clock/(2^(n+1)) */ +#define CLKDIV_256 (7<<16) /* mmc clock frequency. (see above) */ #define SRSPTO_256 ((1 << 13) | (0 << 12)) /* resp timeout */ #define SRBSYTO_29 ((1 << 11) | (1 << 10) | \ (1 << 9) | (1 << 8)) /* resp busy timeout */ @@ -178,14 +181,10 @@ static inline void sh_mmcif_boot_init(void __iomem *base) /* Set block size in MMCIF hardware */ sh_mmcif_writel(base, MMCIF_CE_BLOCK_SET, SH_MMCIF_BBS); - /* Enable the clock, set it to Bus clock/256 (about 325Khz). - * It is unclear where 0x70000 comes from or if it is even needed. - * It is there for byte-compatibility with code that is known to - * work. - */ + /* Enable the clock, set it to Bus clock/256 (about 325Khz). */ sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, - CLK_ENABLE | SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | - SCCSTO_29 | 0x70000); + CLK_ENABLE | CLKDIV_256 | SRSPTO_256 | + SRBSYTO_29 | SRWDTO_29 | SCCSTO_29); /* CMD0 */ sh_mmcif_boot_cmd(base, 0x00000040, 0); @@ -210,7 +209,9 @@ static inline void sh_mmcif_boot_slurp(void __iomem *base, unsigned long tmp; /* In data transfer mode: Set clock to Bus clock/4 (about 20Mhz) */ - sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, 0x01012fff); + sh_mmcif_writel(base, MMCIF_CE_CLK_CTRL, + CLK_ENABLE | CLKDIV_4 | SRSPTO_256 | + SRBSYTO_29 | SRWDTO_29 | SCCSTO_29); /* CMD9 - Get CSD */ sh_mmcif_boot_cmd(base, 0x09806000, 0x00010000); -- cgit v1.2.3-70-g09d2 From a41778694806ac1ccd4b1dafed1abef8d5ba98ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Nov 2010 21:43:02 +0000 Subject: xps: add __rcu annotations Avoid sparse warnings : add __rcu annotations and use rcu_dereference_protected() where necessary. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/net-sysfs.c | 24 +++++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ae4544f0cf..4b0c7f3aa32 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -622,7 +622,7 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; - struct xps_map *cpu_map[0]; + struct xps_map __rcu *cpu_map[0]; }; #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ (nr_cpu_ids * sizeof(struct xps_map *))) @@ -1049,7 +1049,7 @@ struct net_device { spinlock_t tx_global_lock; #ifdef CONFIG_XPS - struct xps_dev_maps *xps_maps; + struct xps_dev_maps __rcu *xps_maps; #endif /* These may be needed for future network-power-down code. */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 35ef42fa0cf..f85cee3d869 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -899,6 +899,8 @@ static void xps_dev_maps_release(struct rcu_head *rcu) } static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, @@ -935,11 +937,12 @@ static ssize_t store_xps_map(struct netdev_queue *queue, mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); for_each_possible_cpu(cpu) { - new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; if (map) { for (pos = 0; pos < map->len; pos++) if (map->queues[pos] == index) @@ -975,13 +978,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue, else new_map = NULL; } - new_dev_maps->cpu_map[cpu] = new_map; + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); } /* Cleanup old maps */ for_each_possible_cpu(cpu) { - map = dev_maps ? dev_maps->cpu_map[cpu] : NULL; - if (map && new_dev_maps->cpu_map[cpu] != map) + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) call_rcu(&map->rcu, xps_map_release); if (new_dev_maps->cpu_map[cpu]) nonempty = 1; @@ -1007,7 +1011,9 @@ error: if (new_dev_maps) for_each_possible_cpu(i) - kfree(new_dev_maps->cpu_map[i]); + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); kfree(new_dev_maps); free_cpumask_var(mask); return -ENOMEM; @@ -1033,11 +1039,11 @@ static void netdev_queue_release(struct kobject *kobj) index = get_netdev_queue_index(queue); mutex_lock(&xps_map_mutex); - dev_maps = dev->xps_maps; + dev_maps = xmap_dereference(dev->xps_maps); if (dev_maps) { for_each_possible_cpu(i) { - map = dev_maps->cpu_map[i]; + map = xmap_dereference(dev_maps->cpu_map[i]); if (!map) continue; -- cgit v1.2.3-70-g09d2 From f7ca38dfe58c20cb1aa2ed9643187e8b194b5bae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Nov 2010 10:02:29 +0100 Subject: nl80211/cfg80211: extend mgmt-tx API for off-channel With p2p, it is sometimes necessary to transmit a frame (typically an action frame) on another channel than the current channel. Enable this through the CMD_FRAME API, and allow it to wait for a response. A new command allows that wait to be aborted. However, allow userspace to specify whether or not it wants to allow off-channel TX, it may actually want to use the same channel only. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++----- include/net/cfg80211.h | 11 +++++++--- net/mac80211/cfg.c | 7 ++++-- net/wireless/core.h | 4 ++-- net/wireless/mlme.c | 9 ++++---- net/wireless/nl80211.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d706bf3badc..5cfa579df47 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -358,11 +358,16 @@ * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on - * which channel the frame is to be transmitted or was received. This - * channel has to be the current channel (remain-on-channel or the - * operational channel). When called, this operation returns a cookie - * (%NL80211_ATTR_COOKIE) that will be included with the TX status event - * pertaining to the TX request. + * which channel the frame is to be transmitted or was received. If this + * channel is not the current channel (remain-on-channel or the + * operational channel) the device will switch to the given channel and + * transmit the frame, optionally waiting for a response for the time + * specified using %NL80211_ATTR_DURATION. When called, this operation + * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the + * TX status event pertaining to the TX request. + * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this + * command may be used with the corresponding cookie to cancel the wait + * time if it is known that it is no longer necessary. * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies @@ -493,6 +498,8 @@ enum nl80211_commands { NL80211_CMD_SET_CHANNEL, NL80211_CMD_SET_WDS_PEER, + NL80211_CMD_FRAME_WAIT_CANCEL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -828,6 +835,12 @@ enum nl80211_commands { * * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS * + * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be + * transmitted on another channel when the channel given doesn't match + * the current channel. If the current channel doesn't match and this + * flag isn't set, the frame will be rejected. This is also used as an + * nl80211 capability flag. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1002,6 +1015,8 @@ enum nl80211_attrs { NL80211_ATTR_MCAST_RATE, + NL80211_ATTR_OFFCHANNEL_TX_OK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0663945cfa4..49a7c53a48c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1134,7 +1134,9 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @mgmt_tx: Transmit a management frame + * @mgmt_tx: Transmit a management frame. + * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management + * frame on another channel * * @testmode_cmd: run a test mode command * @@ -1291,10 +1293,13 @@ struct cfg80211_ops { u64 cookie); int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); + int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479..aac2d7de828 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1552,9 +1552,9 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1565,6 +1565,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; + if (offchan) + return -EOPNOTSUPP; + /* Check that we are on the requested channel for transmission */ if (chan != local->tmp_channel && chan != local->oper_channel) diff --git a/net/wireless/core.h b/net/wireless/core.h index 6583cca0e2e..ee80ad8dc65 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -341,9 +341,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 6980a0c315b..d7680f2a4c5 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -864,9 +864,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -946,8 +946,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + channel_type, channel_type_valid, + wait, buf, len, cookie); } bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67ff7e92cb9..960be4e650f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -163,16 +163,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, - [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, - [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, - [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, - [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -677,6 +674,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -698,6 +696,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + /* for now at least assume all drivers have it */ + if (dev->ops->mgmt_tx) + NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); + if (mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; @@ -4244,6 +4246,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr; u64 cookie; struct sk_buff *msg; + unsigned int wait = 0; + bool offchan; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_WIPHY_FREQ]) @@ -4260,6 +4264,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_DURATION]) { + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EINVAL; + wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { channel_type = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); @@ -4271,6 +4281,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) channel_type_valid = true; } + offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chan = rdev_freq_to_chan(rdev, freq, channel_type); if (chan == NULL) @@ -4287,8 +4299,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, - channel_type_valid, + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); @@ -4307,6 +4319,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u64 cookie; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EOPNOTSUPP; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); +} + static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4879,6 +4916,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .doit = nl80211_tx_mgmt_cancel_wait, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, -- cgit v1.2.3-70-g09d2 From acfa747baf73922021a047f2d87a2d866f5dbab5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 29 Nov 2010 10:16:54 +0100 Subject: TTY: open/hangup race fixup Like in the "TTY: don't allow reopen when ldisc is changing" patch, this one fixes a TTY WARNING as described in the option 1) there: 1) __tty_hangup from tty_ldisc_hangup to tty_ldisc_enable. During this section tty_lock is held. However tty_lock is temporarily dropped in the middle of the function by tty_ldisc_hangup. The fix is to introduce a new flag which we set during the unlocked window and check it in tty_reopen too. The flag is TTY_HUPPING and is cleared after TTY_HUPPED is set. While at it, remove duplicate TTY_HUPPED set_bit. The one after calling ops->hangup seems to be more correct. But anyway, we hold tty_lock, so there should be no difference. Also document the function it does that kind of crap. Nicely reproducible with two forked children: static void do_work(const char *tty) { if (signal(SIGHUP, SIG_IGN) == SIG_ERR) exit(1); setsid(); while (1) { int fd = open(tty, O_RDWR|O_NOCTTY); if (fd < 0) continue; if (ioctl(fd, TIOCSCTTY)) continue; if (vhangup()) continue; close(fd); } exit(0); } Signed-off-by: Jiri Slaby Reported-by: Reported-by: Kyle McMartin Cc: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 10 +++++++++- include/linux/tty.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 878f6d667b1..35480dd57a3 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -559,6 +559,9 @@ void __tty_hangup(struct tty_struct *tty) tty_lock(); + /* some functions below drop BTM, so we need this bit */ + set_bit(TTY_HUPPING, &tty->flags); + /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ @@ -578,6 +581,10 @@ void __tty_hangup(struct tty_struct *tty) } spin_unlock(&tty_files_lock); + /* + * it drops BTM and thus races with reopen + * we protect the race by TTY_HUPPING + */ tty_ldisc_hangup(tty); read_lock(&tasklist_lock); @@ -615,7 +622,6 @@ void __tty_hangup(struct tty_struct *tty) tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; - set_bit(TTY_HUPPED, &tty->flags); spin_unlock_irqrestore(&tty->ctrl_lock, flags); /* Account for the p->signal references we killed */ @@ -641,6 +647,7 @@ void __tty_hangup(struct tty_struct *tty) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); + clear_bit(TTY_HUPPING, &tty->flags); tty_ldisc_enable(tty); tty_unlock(); @@ -1311,6 +1318,7 @@ static int tty_reopen(struct tty_struct *tty) struct tty_driver *driver = tty->driver; if (test_bit(TTY_CLOSING, &tty->flags) || + test_bit(TTY_HUPPING, &tty->flags) || test_bit(TTY_LDISC_CHANGING, &tty->flags)) return -EIO; diff --git a/include/linux/tty.h b/include/linux/tty.h index c7ea9bc8897..021bfd2f086 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -367,6 +367,7 @@ struct tty_file_private { #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_FLUSHING 19 /* Flushing to ldisc in progress */ #define TTY_FLUSHPENDING 20 /* Queued buffer flush pending */ +#define TTY_HUPPING 21 /* ->hangup() in progress */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) -- cgit v1.2.3-70-g09d2 From 24278d148316d2180be6df40e06db013d8b232b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2010 17:25:23 -0700 Subject: rcu: priority boosting for TINY_PREEMPT_RCU Add priority boosting, but only for TINY_PREEMPT_RCU. This is enabled by the default-off RCU_BOOST kernel parameter. The priority to which to boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel parameter (defaulting to real-time priority 1) and the time to wait before boosting the readers blocking a given grace period is controlled by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +- include/linux/sched.h | 11 ++- init/Kconfig | 39 +++++++++ kernel/rcutiny.c | 66 ++++++--------- kernel/rcutiny_plugin.h | 208 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 280 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6b..69f91aacdee 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -81,6 +81,12 @@ extern struct group_info init_groups; */ # define CAP_INIT_BSET CAP_FULL_SET +#ifdef CONFIG_RCU_BOOST +#define INIT_TASK_RCU_BOOST() \ + .rcu_boost_mutex = NULL, +#else +#define INIT_TASK_RCU_BOOST() +#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_TREE_PREEMPT() \ .rcu_blocked_node = NULL, @@ -92,7 +98,8 @@ extern struct group_info init_groups; .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - INIT_TASK_RCU_TREE_PREEMPT() + INIT_TASK_RCU_TREE_PREEMPT() \ + INIT_TASK_RCU_BOOST() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index e18473f0eb7..ed1a9bc52b2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,9 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + struct rt_mutex *rcu_boost_mutex; +#endif /* #ifdef CONFIG_RCU_BOOST */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1745,7 +1748,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { @@ -1753,7 +1757,10 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_read_unlock_special = 0; #ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_RCU_BOOST + p->rcu_boost_mutex = NULL; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/init/Kconfig b/init/Kconfig index a619a1ac7f4..48efefcac12 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -450,6 +450,45 @@ config TREE_RCU_TRACE TREE_PREEMPT_RCU implementations, permitting Makefile to trivially select kernel/rcutree_trace.c. +config RCU_BOOST + bool "Enable RCU priority boosting" + depends on RT_MUTEXES && TINY_PREEMPT_RCU + default n + help + This option boosts the priority of preempted RCU readers that + block the current preemptible RCU grace period for too long. + This option also prevents heavy loads from blocking RCU + callback invocation for all flavors of RCU. + + Say Y here if you are working with real-time apps or heavy loads + Say N here if you are unsure. + +config RCU_BOOST_PRIO + int "Real-time priority to boost RCU readers to" + range 1 99 + depends on RCU_BOOST + default 1 + help + This option specifies the real-time priority to which preempted + RCU readers are to be boosted. If you are working with CPU-bound + real-time applications, you should specify a priority higher then + the highest-priority CPU-bound application. + + Specify the real-time priority, or take the default if unsure. + +config RCU_BOOST_DELAY + int "Milliseconds to delay boosting after RCU grace-period start" + range 0 3000 + depends on RCU_BOOST + default 500 + help + This option specifies the time to wait after the beginning of + a given grace period before priority-boosting preempted RCU + readers blocking that grace period. Note that any RCU reader + blocking an expedited RCU grace period is boosted immediately. + + Accept the default if unsure. + endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 86eef29cdfb..93d166582cb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -36,38 +36,16 @@ #include #include -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_cbs_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); -static unsigned long have_rcu_cbs; -static void invoke_rcu_cbs(void); +/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; +static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ +struct rcu_ctrlblk; static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_cbs(void *arg); +static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) * This is a kthread, but it is never stopped, at least not until * the system goes down. */ -static int rcu_cbs(void *arg) +static int rcu_kthread(void *arg) { unsigned long work; + unsigned long morework; unsigned long flags; for (;;) { - wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); + morework = rcu_boost(); local_irq_save(flags); - work = have_rcu_cbs; - have_rcu_cbs = 0; + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; local_irq_restore(flags); if (work) { rcu_process_callbacks(&rcu_sched_ctrlblk); rcu_process_callbacks(&rcu_bh_ctrlblk); rcu_preempt_process_callbacks(); } + schedule_timeout_interruptible(1); /* Leave CPU for others. */ } return 0; /* Not reached, but needed to shut gcc up. */ } /* - * Wake up rcu_cbs() to process callbacks now eligible for invocation. + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. */ -static void invoke_rcu_cbs(void) +static void invoke_rcu_kthread(void) { unsigned long flags; local_irq_save(flags); - have_rcu_cbs = 1; - wake_up(&rcu_cbs_wq); + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); local_irq_restore(flags); } @@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched); */ static int __init rcu_spawn_kthreads(void) { - rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); return 0; } early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 95f9239df51..24f43165f22 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -24,6 +24,29 @@ #include +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is not such task. */ + /* is no such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ +#ifdef CONFIG_RCU_BOOST + struct list_head *boost_tasks; + /* Pointer to first task that needs to be */ + /* priority-boosted, or NULL if no priority */ + /* boosting is needed. If there is no */ + /* current or expedited grace period, there */ + /* can be no such task. */ +#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ + s8 boosted_this_gp; /* Has boosting already happened? */ + unsigned long boost_time; /* When to start boosting (jiffies) */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void) return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; } +/* + * Advance a ->blkd_tasks-list pointer to the next entry, instead + * returning NULL if at the end of the list. + */ +static struct list_head *rcu_next_node_entry(struct task_struct *t) +{ + struct list_head *np; + + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + return np; +} + +#ifdef CONFIG_RCU_BOOST + +#include "rtmutex_common.h" + +/* + * Carry out RCU priority boosting on the task indicated by ->boost_tasks, + * and advance ->boost_tasks to the next task in the ->blkd_tasks list. + */ +static int rcu_boost(void) +{ + unsigned long flags; + struct rt_mutex mtx; + struct list_head *np; + struct task_struct *t; + + if (rcu_preempt_ctrlblk.boost_tasks == NULL) + return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); + rcu_preempt_ctrlblk.boosted_this_gp++; + t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, + rcu_node_entry); + np = rcu_next_node_entry(t); + rt_mutex_init_proxy_locked(&mtx, t); + t->rcu_boost_mutex = &mtx; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; + raw_local_irq_restore(flags); + rt_mutex_lock(&mtx); + rt_mutex_unlock(&mtx); + return rcu_preempt_ctrlblk.boost_tasks != NULL; +} + +/* + * Check to see if it is now time to start boosting RCU readers blocking + * the current grace period, and, if so, tell the rcu_kthread_task to + * start boosting them. If there is an expedited boost in progress, + * we wait for it to complete. + */ +static void rcu_initiate_boost(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.boosted_this_gp == 0 && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { + rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + invoke_rcu_kthread(); + } +} + +/* + * Initiate boosting for an expedited grace period. + */ +static void rcu_initiate_expedited_boost(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + rcu_preempt_ctrlblk.boosted_this_gp = -1; + invoke_rcu_kthread(); + } + raw_local_irq_restore(flags); +} + +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); + +/* + * Do priority-boost accounting for the start of a new grace period. + */ +static void rcu_preempt_boost_start_gp(void) +{ + rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; + if (rcu_preempt_ctrlblk.boosted_this_gp > 0) + rcu_preempt_ctrlblk.boosted_this_gp = 0; +} + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * If there is no RCU priority boosting, we don't boost. + */ +static int rcu_boost(void) +{ + return 0; +} + +/* + * If there is no RCU priority boosting, we don't initiate boosting. + */ +static void rcu_initiate_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, we don't initiate expedited boosting. + */ +static void rcu_initiate_expedited_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, nothing to do at grace-period start. + */ +static void rcu_preempt_boost_start_gp(void) +{ +} + +#endif /* else #ifdef CONFIG_RCU_BOOST */ + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - /* - * If there is no GP, or if blocked readers are still blocking GP, - * then there is nothing more to do. - */ + /* If there is no GP then there is nothing more to do. */ if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) return; + /* If there are blocked readers, go check up on boosting. */ + if (rcu_preempt_blocked_readers_cgp()) { + rcu_initiate_boost(); + return; + } /* Advance callbacks. */ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; @@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void) /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; + /* Set up for RCU priority boosting. */ + rcu_preempt_boost_start_gp(); + /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; + np = rcu_next_node_entry(t); list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; +#ifdef CONFIG_RCU_BOOST + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) + rcu_preempt_ctrlblk.boost_tasks = np; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } +#ifdef CONFIG_RCU_BOOST + /* Unboost self if was boosted. */ + if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; + rt_mutex_unlock(t->rcu_boost_mutex); + t->rcu_boost_mutex = NULL; + } +#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_cbs(); + invoke_rcu_kthread(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) + rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -574,6 +747,15 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +/* + * Because preemptible RCU does not exist, it is never necessary to + * boost preempted RCU readers. + */ +static int rcu_boost(void) +{ + return 0; +} + /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void) } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else /* #ifdef CONFIG_RCU_BOOST */ +#define RCU_BOOST_PRIO 1 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3-70-g09d2 From 7b27d5475f86186914e54e4a6bb994e9a985337b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 21 Oct 2010 11:29:05 +0800 Subject: rcu,cleanup: move synchronize_sched_expedited() out of sched.c The first version of synchronize_sched_expedited() used the migration code in the scheduler, and was therefore implemented in kernel/sched.c. However, the more recent version of this code no longer uses the migration code, so this commit moves it to the main RCU source files. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 5 ++++ include/linux/rcutree.h | 1 + kernel/rcutree_plugin.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched.c | 69 ---------------------------------------------- 5 files changed, 77 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7142ee3304a..49e8e16308e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -66,7 +66,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); -extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); static inline void __rcu_read_lock_bh(void) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ea025a611fc..30ebd7c8d87 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -60,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } +static inline void synchronize_sched_expedited(void) +{ + synchronize_sched(); +} + #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0e96833aa7..3a933482734 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,6 +48,7 @@ static inline void exit_rcu(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); +extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 71a4147473f..21df7f3e727 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,7 @@ */ #include +#include /* * Check the RCU kernel configuration parameters and print informative @@ -1014,6 +1015,76 @@ static void __init __rcu_init_preempt(void) #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifndef CONFIG_SMP + +void synchronize_sched_expedited(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#else /* #ifndef CONFIG_SMP */ + +static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); + +static int synchronize_sched_expedited_cpu_stop(void *data) +{ + /* + * There must be a full memory barrier on each affected CPU + * between the time that try_stop_cpus() is called and the + * time that it returns. + * + * In the current initial implementation of cpu_stop, the + * above condition is already met when the control reaches + * this point and the following smp_mb() is not strictly + * necessary. Do smp_mb() anyway for documentation and + * robustness against future implementation changes. + */ + smp_mb(); /* See above comment block. */ + return 0; +} + +/* + * Wait for an rcu-sched grace period to elapse, but use "big hammer" + * approach to force grace period to end quickly. This consumes + * significant time on all CPUs, and is thus not recommended for + * any sort of common-case code. + * + * Note that it is illegal to call this function while holding any + * lock that is acquired by a CPU-hotplug notifier. Failing to + * observe this restriction will result in deadlock. + */ +void synchronize_sched_expedited(void) +{ + int snap, trycount = 0; + + smp_mb(); /* ensure prior mod happens before capturing snap. */ + snap = atomic_read(&synchronize_sched_expedited_count) + 1; + get_online_cpus(); + while (try_stop_cpus(cpu_online_mask, + synchronize_sched_expedited_cpu_stop, + NULL) == -EAGAIN) { + put_online_cpus(); + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_sched(); + return; + } + if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { + smp_mb(); /* ensure test happens before caller kfree */ + return; + } + get_online_cpus(); + } + atomic_inc(&synchronize_sched_expedited_count); + smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#endif /* #else #ifndef CONFIG_SMP */ + #if !defined(CONFIG_RCU_FAST_NO_HZ) /* diff --git a/kernel/sched.c b/kernel/sched.c index ae8f75a5ceb..d1e8889872a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9131,72 +9131,3 @@ struct cgroup_subsys cpuacct_subsys = { }; #endif /* CONFIG_CGROUP_CPUACCT */ -#ifndef CONFIG_SMP - -void synchronize_sched_expedited(void) -{ - barrier(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#else /* #ifndef CONFIG_SMP */ - -static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); - -static int synchronize_sched_expedited_cpu_stop(void *data) -{ - /* - * There must be a full memory barrier on each affected CPU - * between the time that try_stop_cpus() is called and the - * time that it returns. - * - * In the current initial implementation of cpu_stop, the - * above condition is already met when the control reaches - * this point and the following smp_mb() is not strictly - * necessary. Do smp_mb() anyway for documentation and - * robustness against future implementation changes. - */ - smp_mb(); /* See above comment block. */ - return 0; -} - -/* - * Wait for an rcu-sched grace period to elapse, but use "big hammer" - * approach to force grace period to end quickly. This consumes - * significant time on all CPUs, and is thus not recommended for - * any sort of common-case code. - * - * Note that it is illegal to call this function while holding any - * lock that is acquired by a CPU-hotplug notifier. Failing to - * observe this restriction will result in deadlock. - */ -void synchronize_sched_expedited(void) -{ - int snap, trycount = 0; - - smp_mb(); /* ensure prior mod happens before capturing snap. */ - snap = atomic_read(&synchronize_sched_expedited_count) + 1; - get_online_cpus(); - while (try_stop_cpus(cpu_online_mask, - synchronize_sched_expedited_cpu_stop, - NULL) == -EAGAIN) { - put_online_cpus(); - if (trycount++ < 10) - udelay(trycount * num_online_cpus()); - else { - synchronize_sched(); - return; - } - if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { - smp_mb(); /* ensure test happens before caller kfree */ - return; - } - get_online_cpus(); - } - atomic_inc(&synchronize_sched_expedited_count); - smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(synchronize_sched_expedited); - -#endif /* #else #ifndef CONFIG_SMP */ -- cgit v1.2.3-70-g09d2 From 9833c39400c3e6ee19daeded6910df648741611e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 19 Nov 2010 09:29:24 +0100 Subject: ARM: 6485/5: proc/vmcore - allow archs to override vmcore_elf_check_arch() Allow architectures to redefine this macro if needed. This is useful for example in architectures where 64-bit ELF vmcores are not supported. Specifying zero vmcore_elf64_check_arch() allows compiler to optimize away unnecessary parts of parse_crash_elf64_headers(). We also rename the macro to vmcore_elf64_check_arch() to reflect that it is used for 64-bit vmcores only. Signed-off-by: Mika Westerberg Signed-off-by: Russell King --- fs/proc/vmcore.c | 2 +- include/linux/crash_dump.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 2367fb3f70b..74802bc5ded 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void) /* Do some basic Verification. */ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || (ehdr.e_type != ET_CORE) || - !vmcore_elf_check_arch(&ehdr) || + !vmcore_elf64_check_arch(&ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64 || ehdr.e_ident[EI_VERSION] != EV_CURRENT || ehdr.e_version != EV_CURRENT || diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0026f267da2..088cd4ace4e 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -20,7 +20,14 @@ extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, #define vmcore_elf_check_arch_cross(x) 0 #endif -#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +/* + * Architecture code can redefine this if there are any special checks + * needed for 64-bit ELF vmcores. In case of 32-bit only architecture, + * this can be set to zero. + */ +#ifndef vmcore_elf64_check_arch +#define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) +#endif /* * is_kdump_kernel() checks whether this kernel is booting after a panic of -- cgit v1.2.3-70-g09d2 From 5091faa449ee0b7d73bc296a93bca9540fc51d0a Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 30 Nov 2010 14:18:03 +0100 Subject: sched: Add 'autogroup' scheduling feature: automated per session task groups A recurring complaint from CFS users is that parallel kbuild has a negative impact on desktop interactivity. This patch implements an idea from Linus, to automatically create task groups. Currently, only per session autogroups are implemented, but the patch leaves the way open for enhancement. Implementation: each task's signal struct contains an inherited pointer to a refcounted autogroup struct containing a task group pointer, the default for all tasks pointing to the init_task_group. When a task calls setsid(), a new task group is created, the process is moved into the new task group, and a reference to the preveious task group is dropped. Child processes inherit this task group thereafter, and increase it's refcount. When the last thread of a process exits, the process's reference is dropped, such that when the last process referencing an autogroup exits, the autogroup is destroyed. At runqueue selection time, IFF a task has no cgroup assignment, its current autogroup is used. Autogroup bandwidth is controllable via setting it's nice level through the proc filesystem: cat /proc//autogroup Displays the task's group and the group's nice level. echo > /proc//autogroup Sets the task group's shares to the weight of nice task. Setting nice level is rate limited for !admin users due to the abuse risk of task group locking. The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via the boot option noautogroup, and can also be turned on/off on the fly via: echo [01] > /proc/sys/kernel/sched_autogroup_enabled ... which will automatically move tasks to/from the root task group. Signed-off-by: Mike Galbraith Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Cc: Markus Trippelsdorf Cc: Mathieu Desnoyers Cc: Paul Turner Cc: Oleg Nesterov [ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ] Signed-off-by: Ingo Molnar LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net> Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 + fs/proc/base.c | 79 +++++++++++++ include/linux/sched.h | 23 ++++ init/Kconfig | 13 ++ kernel/fork.c | 5 +- kernel/sched.c | 13 +- kernel/sched_autogroup.c | 229 ++++++++++++++++++++++++++++++++++++ kernel/sched_autogroup.h | 32 +++++ kernel/sched_debug.c | 47 +------- kernel/sys.c | 4 +- kernel/sysctl.c | 11 ++ 11 files changed, 409 insertions(+), 49 deletions(-) create mode 100644 kernel/sched_autogroup.c create mode 100644 kernel/sched_autogroup.h (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 92e83e53148..86820a727b0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + noautogroup Disable scheduler automatic task group creation. + nobats [PPC] Do not use BATs for mapping kernel lowmem on "Classic" PPC cores. diff --git a/fs/proc/base.c b/fs/proc/base.c index f3d02ca461e..2fa0ce29b6d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = { #endif +#ifdef CONFIG_SCHED_AUTOGROUP +/* + * Print out autogroup related information: + */ +static int sched_autogroup_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_autogroup_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_autogroup_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct task_struct *p; + char buffer[PROC_NUMBUF]; + long nice; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + err = strict_strtol(strstrip(buffer), 0, &nice); + if (err) + return -EINVAL; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = nice; + err = proc_sched_autogroup_set_nice(p, &err); + if (err) + count = err; + + put_task_struct(p); + + return count; +} + +static int sched_autogroup_open(struct inode *inode, struct file *filp) +{ + int ret; + + ret = single_open(filp, sched_autogroup_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_pid_sched_autogroup_operations = { + .open = sched_autogroup_open, + .read = seq_read, + .write = sched_autogroup_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_AUTOGROUP */ + static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = { INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), +#endif +#ifdef CONFIG_SCHED_AUTOGROUP + REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b92c70c73..9c2d46da486 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -509,6 +509,8 @@ struct thread_group_cputimer { spinlock_t lock; }; +struct autogroup; + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -576,6 +578,9 @@ struct signal_struct { struct tty_struct *tty; /* NULL if no tty */ +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. @@ -1927,6 +1932,24 @@ int sched_rt_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_compat_yield; +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; + +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +#endif + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); diff --git a/init/Kconfig b/init/Kconfig index 88c10468db4..f1bba0a1b05 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -728,6 +728,19 @@ config NET_NS endif # NAMESPACES +config SCHED_AUTOGROUP + bool "Automatic process group scheduling" + select EVENTFD + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED + help + This option optimizes the scheduler for common desktop workloads by + automatically creating and populating task groups. This separation + of workloads isolates aggressive CPU burners (like build jobs) from + desktop applications. Task group autogeneration is currently based + upon task session. + config MM_OWNER bool diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b..b6f2475f1e8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (atomic_dec_and_test(&sig->sigcnt)) { + sched_autogroup_exit(sig); free_signal_struct(sig); + } } void __put_task_struct(struct task_struct *tsk) @@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) posix_cpu_timers_init_group(sig); tty_audit_fork(sig); + sched_autogroup_fork(sig); sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; diff --git a/kernel/sched.c b/kernel/sched.c index 66ef5790d93..b646dad4a40 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,6 +79,7 @@ #include "sched_cpupri.h" #include "workqueue_sched.h" +#include "sched_autogroup.h" #define CREATE_TRACE_POINTS #include @@ -271,6 +272,10 @@ struct task_group { struct task_group *parent; struct list_head siblings; struct list_head children; + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif }; #define root_task_group init_task_group @@ -603,11 +608,14 @@ static inline int cpu_of(struct rq *rq) */ static inline struct task_group *task_group(struct task_struct *p) { + struct task_group *tg; struct cgroup_subsys_state *css; css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); - return container_of(css, struct task_group, css); + tg = container_of(css, struct task_group, css); + + return autogroup_task_group(p, tg); } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ @@ -1869,6 +1877,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" +#include "sched_autogroup.c" #include "sched_stoptask.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" @@ -7750,7 +7759,7 @@ void __init sched_init(void) #ifdef CONFIG_CGROUP_SCHED list_add(&init_task_group.list, &task_groups); INIT_LIST_HEAD(&init_task_group.children); - + autogroup_init(&init_task); #endif /* CONFIG_CGROUP_SCHED */ for_each_possible_cpu(i) { diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c new file mode 100644 index 00000000000..57a7ac286a0 --- /dev/null +++ b/kernel/sched_autogroup.c @@ -0,0 +1,229 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +#include +#include +#include +#include + +unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; +static struct autogroup autogroup_default; +static atomic_t autogroup_seq_nr; + +static void autogroup_init(struct task_struct *init_task) +{ + autogroup_default.tg = &init_task_group; + init_task_group.autogroup = &autogroup_default; + kref_init(&autogroup_default.kref); + init_rwsem(&autogroup_default.lock); + init_task->signal->autogroup = &autogroup_default; +} + +static inline void autogroup_free(struct task_group *tg) +{ + kfree(tg->autogroup); +} + +static inline void autogroup_destroy(struct kref *kref) +{ + struct autogroup *ag = container_of(kref, struct autogroup, kref); + + sched_destroy_group(ag->tg); +} + +static inline void autogroup_kref_put(struct autogroup *ag) +{ + kref_put(&ag->kref, autogroup_destroy); +} + +static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) +{ + kref_get(&ag->kref); + return ag; +} + +static inline struct autogroup *autogroup_create(void) +{ + struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); + struct task_group *tg; + + if (!ag) + goto out_fail; + + tg = sched_create_group(&init_task_group); + + if (IS_ERR(tg)) + goto out_free; + + kref_init(&ag->kref); + init_rwsem(&ag->lock); + ag->id = atomic_inc_return(&autogroup_seq_nr); + ag->tg = tg; + tg->autogroup = ag; + + return ag; + +out_free: + kfree(ag); +out_fail: + if (printk_ratelimit()) { + printk(KERN_WARNING "autogroup_create: %s failure.\n", + ag ? "sched_create_group()" : "kmalloc()"); + } + + return autogroup_kref_get(&autogroup_default); +} + +static inline bool +task_wants_autogroup(struct task_struct *p, struct task_group *tg) +{ + if (tg != &root_task_group) + return false; + + if (p->sched_class != &fair_sched_class) + return false; + + /* + * We can only assume the task group can't go away on us if + * autogroup_move_group() can see us on ->thread_group list. + */ + if (p->flags & PF_EXITING) + return false; + + return true; +} + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); + + if (enabled && task_wants_autogroup(p, tg)) + return p->signal->autogroup->tg; + + return tg; +} + +static void +autogroup_move_group(struct task_struct *p, struct autogroup *ag) +{ + struct autogroup *prev; + struct task_struct *t; + unsigned long flags; + + BUG_ON(!lock_task_sighand(p, &flags)); + + prev = p->signal->autogroup; + if (prev == ag) { + unlock_task_sighand(p, &flags); + return; + } + + p->signal->autogroup = autogroup_kref_get(ag); + + t = p; + do { + sched_move_task(t); + } while_each_thread(p, t); + + unlock_task_sighand(p, &flags); + autogroup_kref_put(prev); +} + +/* Allocates GFP_KERNEL, cannot be called under any spinlock */ +void sched_autogroup_create_attach(struct task_struct *p) +{ + struct autogroup *ag = autogroup_create(); + + autogroup_move_group(p, ag); + /* drop extra refrence added by autogroup_create() */ + autogroup_kref_put(ag); +} +EXPORT_SYMBOL(sched_autogroup_create_attach); + +/* Cannot be called under siglock. Currently has no users */ +void sched_autogroup_detach(struct task_struct *p) +{ + autogroup_move_group(p, &autogroup_default); +} +EXPORT_SYMBOL(sched_autogroup_detach); + +void sched_autogroup_fork(struct signal_struct *sig) +{ + struct task_struct *p = current; + + spin_lock_irq(&p->sighand->siglock); + sig->autogroup = autogroup_kref_get(p->signal->autogroup); + spin_unlock_irq(&p->sighand->siglock); +} + +void sched_autogroup_exit(struct signal_struct *sig) +{ + autogroup_kref_put(sig->autogroup); +} + +static int __init setup_autogroup(char *str) +{ + sysctl_sched_autogroup_enabled = 0; + + return 1; +} + +__setup("noautogroup", setup_autogroup); + +#ifdef CONFIG_PROC_FS + +/* Called with siglock held. */ +int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) +{ + static unsigned long next = INITIAL_JIFFIES; + struct autogroup *ag; + int err; + + if (*nice < -20 || *nice > 19) + return -EINVAL; + + err = security_task_setnice(current, *nice); + if (err) + return err; + + if (*nice < 0 && !can_nice(current, *nice)) + return -EPERM; + + /* this is a heavy operation taking global locks.. */ + if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) + return -EAGAIN; + + next = HZ / 10 + jiffies; + ag = autogroup_kref_get(p->signal->autogroup); + + down_write(&ag->lock); + err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); + if (!err) + ag->nice = *nice; + up_write(&ag->lock); + + autogroup_kref_put(ag); + + return err; +} + +void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) +{ + struct autogroup *ag = autogroup_kref_get(p->signal->autogroup); + + down_read(&ag->lock); + seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); + up_read(&ag->lock); + + autogroup_kref_put(ag); +} +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); +} +#endif /* CONFIG_SCHED_DEBUG */ + +#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h new file mode 100644 index 00000000000..5358e241cb2 --- /dev/null +++ b/kernel/sched_autogroup.h @@ -0,0 +1,32 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +struct autogroup { + struct kref kref; + struct task_group *tg; + struct rw_semaphore lock; + unsigned long id; + int nice; +}; + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg); + +#else /* !CONFIG_SCHED_AUTOGROUP */ + +static inline void autogroup_init(struct task_struct *init_task) { } +static inline void autogroup_free(struct task_group *tg) { } + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + return tg; +} + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return 0; +} +#endif + +#endif /* CONFIG_SCHED_AUTOGROUP */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index e95b77414a9..1dfae3d014b 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec) #define SPLIT_NS(x) nsec_high(x), nsec_low(x) #ifdef CONFIG_FAIR_GROUP_SCHED -static void print_cfs_group_stats(struct seq_file *m, int cpu, - struct task_group *tg) +static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) { struct sched_entity *se = tg->se[cpu]; if (!se) @@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif -#ifdef CONFIG_CGROUP_SCHED - { - char path[64]; - - rcu_read_lock(); - cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); - rcu_read_unlock(); - SEQ_printf(m, " %s", path); - } -#endif SEQ_printf(m, "\n"); } @@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_unlock_irqrestore(&tasklist_lock, flags); } -#if defined(CONFIG_CGROUP_SCHED) && \ - (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) -static void task_group_path(struct task_group *tg, char *buf, int buflen) -{ - /* may be NULL if the underlying cgroup isn't fully-created yet */ - if (!tg->css.cgroup) { - buf[0] = '\0'; - return; - } - cgroup_path(tg->css.cgroup, buf, buflen); -} -#endif - void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, @@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) struct sched_entity *last; unsigned long flags; -#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) - char path[128]; - struct task_group *tg = cfs_rq->tg; - - task_group_path(tg, path, sizeof(path)); - - SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); -#else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); -#endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -215,7 +182,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", cfs_rq->load_contribution); SEQ_printf(m, " .%-30s: %d\n", "load_tg", - atomic_read(&tg->load_weight)); + atomic_read(&cfs_rq->tg->load_weight)); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); @@ -224,17 +191,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { -#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) - char path[128]; - struct task_group *tg = rt_rq->tg; - - task_group_path(tg, path, sizeof(path)); - - SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); -#else SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); -#endif - #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) diff --git a/kernel/sys.c b/kernel/sys.c index 7f5a0cd296a..2745dcdb6c6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid) err = session; out: write_unlock_irq(&tasklist_lock); - if (err > 0) + if (err > 0) { proc_sid_connector(group_leader); + sched_autogroup_create_attach(group_leader); + } return err; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a00fdefd24c..121e4fff03d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -370,6 +370,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_AUTOGROUP + { + .procname = "sched_autogroup_enabled", + .data = &sysctl_sched_autogroup_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- cgit v1.2.3-70-g09d2 From c320c7b7d380e630f595de1236d9d085b035d5b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Oct 2010 12:50:11 -0200 Subject: perf events: Precalculate the header space for PERF_SAMPLE_ fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others can be precalculated, reducing a bit the per sample cost. Acked-by: Peter Zijlstra Cc: Frédéric Weisbecker Cc: Ian Munsie Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 + kernel/perf_event.c | 150 +++++++++++++++++++++++++++------------------ 2 files changed, 93 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cbf04cc1e63..adf6d993164 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -758,6 +758,8 @@ struct perf_event { u64 shadow_ctx_time; struct perf_event_attr attr; + u16 header_size; + u16 read_size; struct hw_perf_event hw; struct perf_event_context *ctx; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index af1e63f249f..aede71245e9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_stat++; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__read_size(struct perf_event *event) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (event->attr.read_format & PERF_FORMAT_GROUP) { + nr += event->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + event->read_size = size; +} + +static void perf_event__header_size(struct perf_event *event) +{ + struct perf_sample_data *data; + u64 sample_type = event->attr.sample_type; + u16 size = 0; + + perf_event__read_size(event); + + if (sample_type & PERF_SAMPLE_IP) + size += sizeof(data->ip); + + if (sample_type & PERF_SAMPLE_TID) + size += sizeof(data->tid_entry); + + if (sample_type & PERF_SAMPLE_TIME) + size += sizeof(data->time); + + if (sample_type & PERF_SAMPLE_ADDR) + size += sizeof(data->addr); + + if (sample_type & PERF_SAMPLE_ID) + size += sizeof(data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + size += sizeof(data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + size += sizeof(data->cpu_entry); + + if (sample_type & PERF_SAMPLE_PERIOD) + size += sizeof(data->period); + + if (sample_type & PERF_SAMPLE_READ) + size += event->read_size; + + event->header_size = size; +} + static void perf_group_attach(struct perf_event *event) { - struct perf_event *group_leader = event->group_leader; + struct perf_event *group_leader = event->group_leader, *pos; /* * We can have double attach due to group movement in perf_event_open. @@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; + + perf_event__header_size(group_leader); + + list_for_each_entry(pos, &group_leader->sibling_list, group_entry) + perf_event__header_size(pos); } /* @@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event) if (event->group_leader != event) { list_del_init(&event->group_entry); event->group_leader->nr_siblings--; - return; + goto out; } if (!list_empty(&event->group_entry)) @@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_flags = event->group_flags; } + +out: + perf_event__header_size(event->group_leader); + + list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) + perf_event__header_size(tmp); } static inline int @@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file) return perf_event_release_kernel(event); } -static int perf_event_read_size(struct perf_event *event) -{ - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; - size += sizeof(u64); - } - - size += entry * nr; - - return size; -} - u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; @@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) if (event->state == PERF_EVENT_STATE_ERROR) return 0; - if (count < perf_event_read_size(event)) + if (count < event->read_size) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); @@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header, data->type = sample_type; header->type = PERF_RECORD_SAMPLE; - header->size = sizeof(*header); + header->size = sizeof(*header) + event->header_size; header->misc = 0; header->misc |= perf_misc_flags(regs); - if (sample_type & PERF_SAMPLE_IP) { + if (sample_type & PERF_SAMPLE_IP) data->ip = perf_instruction_pointer(regs); - header->size += sizeof(data->ip); - } - if (sample_type & PERF_SAMPLE_TID) { /* namespace issues */ data->tid_entry.pid = perf_event_pid(event, current); data->tid_entry.tid = perf_event_tid(event, current); - - header->size += sizeof(data->tid_entry); } - if (sample_type & PERF_SAMPLE_TIME) { + if (sample_type & PERF_SAMPLE_TIME) data->time = perf_clock(); - header->size += sizeof(data->time); - } - - if (sample_type & PERF_SAMPLE_ADDR) - header->size += sizeof(data->addr); - - if (sample_type & PERF_SAMPLE_ID) { + if (sample_type & PERF_SAMPLE_ID) data->id = primary_event_id(event); - header->size += sizeof(data->id); - } - - if (sample_type & PERF_SAMPLE_STREAM_ID) { + if (sample_type & PERF_SAMPLE_STREAM_ID) data->stream_id = event->id; - header->size += sizeof(data->stream_id); - } - if (sample_type & PERF_SAMPLE_CPU) { data->cpu_entry.cpu = raw_smp_processor_id(); data->cpu_entry.reserved = 0; - - header->size += sizeof(data->cpu_entry); } - if (sample_type & PERF_SAMPLE_PERIOD) - header->size += sizeof(data->period); - - if (sample_type & PERF_SAMPLE_READ) - header->size += perf_event_read_size(event); - if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; @@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event, .header = { .type = PERF_RECORD_READ, .misc = 0, - .size = sizeof(read_event) + perf_event_read_size(event), + .size = sizeof(read_event) + event->read_size, }, .pid = perf_event_pid(event, task), .tid = perf_event_tid(event, task), @@ -5714,6 +5741,11 @@ SYSCALL_DEFINE5(perf_event_open, list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); + /* + * Precalculate sample_data sizes + */ + perf_event__header_size(event); + /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction -- cgit v1.2.3-70-g09d2 From 498cb95175c29ed96bf32f30df2d11ec1c7f3879 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 30 Nov 2010 14:11:49 -0800 Subject: OMAP: Serial: Define OMAP uart MDR1 reg and remove magic numbers Define MDR1 register serial definitions used in serial and bluetooth drivers. Change magic number to ones defined in serial_reg for omap1/2 serial driver. Remove redefined MDR1 register definitions in omap-serial driver. Signed-off-by: Andrei Emeltchenko Acked-by: G, Manjunath Kondaiah Acked-by: Govindraj.R Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/serial.c | 6 ++++-- arch/arm/mach-omap2/serial.c | 15 +++++++++------ arch/arm/plat-omap/include/plat/omap-serial.h | 3 --- drivers/serial/omap-serial.c | 6 +++--- include/linux/serial_reg.h | 12 ++++++++++++ 5 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index b78d0749f13..c73d1b77b36 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -52,9 +52,11 @@ static inline void omap_serial_outp(struct plat_serial8250_port *p, int offset, */ static void __init omap_serial_reset(struct plat_serial8250_port *p) { - omap_serial_outp(p, UART_OMAP_MDR1, 0x07); /* disable UART */ + omap_serial_outp(p, UART_OMAP_MDR1, + UART_OMAP_MDR1_DISABLE); /* disable UART */ omap_serial_outp(p, UART_OMAP_SCR, 0x08); /* TX watermark */ - omap_serial_outp(p, UART_OMAP_MDR1, 0x00); /* enable UART */ + omap_serial_outp(p, UART_OMAP_MDR1, + UART_OMAP_MDR1_16X_MODE); /* enable UART */ if (!cpu_is_omap15xx()) { omap_serial_outp(p, UART_OMAP_SYSC, 0x01); diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index d17960a1be2..fa9806250b5 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -169,9 +169,9 @@ static inline void serial_write_reg(struct omap_uart_state *uart, int offset, static inline void __init omap_uart_reset(struct omap_uart_state *uart) { - serial_write_reg(uart, UART_OMAP_MDR1, 0x07); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); serial_write_reg(uart, UART_OMAP_SCR, 0x08); - serial_write_reg(uart, UART_OMAP_MDR1, 0x00); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE); } #if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3) @@ -247,9 +247,10 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) uart->context_valid = 0; if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS) - omap_uart_mdr1_errataset(uart, 0x07, 0xA0); + omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_DISABLE, 0xA0); else - serial_write_reg(uart, UART_OMAP_MDR1, 0x7); + serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); + serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ efr = serial_read_reg(uart, UART_EFR); serial_write_reg(uart, UART_EFR, UART_EFR_ECB); @@ -268,11 +269,13 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) serial_write_reg(uart, UART_OMAP_SCR, uart->scr); serial_write_reg(uart, UART_OMAP_WER, uart->wer); serial_write_reg(uart, UART_OMAP_SYSC, uart->sysc); + if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS) - omap_uart_mdr1_errataset(uart, 0x00, 0xA1); + omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_16X_MODE, 0xA1); else /* UART 16x mode */ - serial_write_reg(uart, UART_OMAP_MDR1, 0x00); + serial_write_reg(uart, UART_OMAP_MDR1, + UART_OMAP_MDR1_16X_MODE); } #else static inline void omap_uart_save_context(struct omap_uart_state *uart) {} diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h index c8dae02f070..6a178801461 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/arch/arm/plat-omap/include/plat/omap-serial.h @@ -31,9 +31,6 @@ */ #define OMAP_SERIAL_NAME "ttyO" -#define OMAP_MDR1_DISABLE 0x07 -#define OMAP_MDR1_MODE13X 0x03 -#define OMAP_MDR1_MODE16X 0x00 #define OMAP_MODE13X_SPEED 230400 /* diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c index 14365f72b66..03a96db67de 100644 --- a/drivers/serial/omap-serial.c +++ b/drivers/serial/omap-serial.c @@ -753,7 +753,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, /* Protocol, Baud Rate, and Interrupt Settings */ - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_DISABLE); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); up->efr = serial_in(up, UART_EFR); @@ -774,9 +774,9 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_out(up, UART_LCR, cval); if (baud > 230400 && baud != 3000000) - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE13X); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_13X_MODE); else - serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE16X); + serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE); /* Hardware Flow Control Configuration */ diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index c7a0ce11cd4..6f3823474e6 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -341,5 +341,17 @@ #define UART_OMAP_SYSS 0x16 /* System status register */ #define UART_OMAP_WER 0x17 /* Wake-up enable register */ +/* + * These are the definitions for the MDR1 register + */ +#define UART_OMAP_MDR1_16X_MODE 0x00 /* UART 16x mode */ +#define UART_OMAP_MDR1_SIR_MODE 0x01 /* SIR mode */ +#define UART_OMAP_MDR1_16X_ABAUD_MODE 0x02 /* UART 16x auto-baud */ +#define UART_OMAP_MDR1_13X_MODE 0x03 /* UART 13x mode */ +#define UART_OMAP_MDR1_MIR_MODE 0x04 /* MIR mode */ +#define UART_OMAP_MDR1_FIR_MODE 0x05 /* FIR mode */ +#define UART_OMAP_MDR1_CIR_MODE 0x06 /* CIR mode */ +#define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3-70-g09d2 From 662b083a87a3489f3f19c6e0651c1b99b0de5df0 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 30 Nov 2010 14:11:49 -0800 Subject: omap: Serial: Define register access modes in LCR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to some registers depends on register access mode Three different modes are available for OMAP (at least) • Operational mode LCR_REG[7] = 0x0 • Configuration mode A LCR_REG[7] = 0x1 and LCR_REG[7:0]! = 0xBF • Configuration mode B LCR_REG[7] = 0x1 and LCR_REG[7:0] = 0xBF Define access modes and remove redefinitions and magic numbers in serial drivers (and later in bluetooth driver). Signed-off-by: Andrei Emeltchenko Acked-by: Govindraj.R Acked-by: Greg Kroah-Hartman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/serial.c | 12 +++++----- arch/arm/plat-omap/include/plat/omap-serial.h | 9 ------- drivers/serial/8250.c | 26 ++++++++++---------- drivers/serial/omap-serial.c | 34 +++++++++++++-------------- include/linux/serial_reg.h | 7 ++++++ 5 files changed, 43 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index fa9806250b5..9dc077e2d8a 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -219,7 +219,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart) return; lcr = serial_read_reg(uart, UART_LCR); - serial_write_reg(uart, UART_LCR, 0xBF); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); uart->dll = serial_read_reg(uart, UART_DLL); uart->dlh = serial_read_reg(uart, UART_DLM); serial_write_reg(uart, UART_LCR, lcr); @@ -227,7 +227,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart) uart->sysc = serial_read_reg(uart, UART_OMAP_SYSC); uart->scr = serial_read_reg(uart, UART_OMAP_SCR); uart->wer = serial_read_reg(uart, UART_OMAP_WER); - serial_write_reg(uart, UART_LCR, 0x80); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A); uart->mcr = serial_read_reg(uart, UART_MCR); serial_write_reg(uart, UART_LCR, lcr); @@ -251,19 +251,19 @@ static void omap_uart_restore_context(struct omap_uart_state *uart) else serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); efr = serial_read_reg(uart, UART_EFR); serial_write_reg(uart, UART_EFR, UART_EFR_ECB); serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */ serial_write_reg(uart, UART_IER, 0x0); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); serial_write_reg(uart, UART_DLL, uart->dll); serial_write_reg(uart, UART_DLM, uart->dlh); serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */ serial_write_reg(uart, UART_IER, uart->ier); - serial_write_reg(uart, UART_LCR, 0x80); + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A); serial_write_reg(uart, UART_MCR, uart->mcr); - serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */ + serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B); serial_write_reg(uart, UART_EFR, efr); serial_write_reg(uart, UART_LCR, UART_LCR_WLEN8); serial_write_reg(uart, UART_OMAP_SCR, uart->scr); diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h index 6a178801461..b3e0bad9b77 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/arch/arm/plat-omap/include/plat/omap-serial.h @@ -33,15 +33,6 @@ #define OMAP_MODE13X_SPEED 230400 -/* - * LCR = 0XBF: Switch to Configuration Mode B. - * In configuration mode b allow access - * to EFR,DLL,DLH. - * Reference OMAP TRM Chapter 17 - * Section: 1.4.3 Mode Selection - */ -#define OMAP_UART_LCR_CONF_MDB 0XBF - /* WER = 0x7F * Enable module level wakeup in WER reg */ diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 4d8e14b7aa9..aaf9907e601 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -653,13 +653,13 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) { if (p->capabilities & UART_CAP_SLEEP) { if (p->capabilities & UART_CAP_EFR) { - serial_outp(p, UART_LCR, 0xBF); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(p, UART_EFR, UART_EFR_ECB); serial_outp(p, UART_LCR, 0); } serial_outp(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); if (p->capabilities & UART_CAP_EFR) { - serial_outp(p, UART_LCR, 0xBF); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(p, UART_EFR, 0); serial_outp(p, UART_LCR, 0); } @@ -752,7 +752,7 @@ static int size_fifo(struct uart_8250_port *up) serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_outp(up, UART_MCR, UART_MCR_LOOP); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); old_dl = serial_dl_read(up); serial_dl_write(up, 0x0001); serial_outp(up, UART_LCR, 0x03); @@ -764,7 +764,7 @@ static int size_fifo(struct uart_8250_port *up) serial_inp(up, UART_RX); serial_outp(up, UART_FCR, old_fcr); serial_outp(up, UART_MCR, old_mcr); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_dl_write(up, old_dl); serial_outp(up, UART_LCR, old_lcr); @@ -782,7 +782,7 @@ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) unsigned int id; old_lcr = serial_inp(p, UART_LCR); - serial_outp(p, UART_LCR, UART_LCR_DLAB); + serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_A); old_dll = serial_inp(p, UART_DLL); old_dlm = serial_inp(p, UART_DLM); @@ -836,7 +836,7 @@ static void autoconfig_has_efr(struct uart_8250_port *up) * recommended for new designs). */ up->acr = 0; - serial_out(up, UART_LCR, 0xBF); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, 0x00); id1 = serial_icr_read(up, UART_ID1); @@ -945,7 +945,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) * Check for presence of the EFR when DLAB is set. * Only ST16C650V1 UARTs pass this test. */ - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); if (serial_in(up, UART_EFR) == 0) { serial_outp(up, UART_EFR, 0xA8); if (serial_in(up, UART_EFR) != 0) { @@ -963,7 +963,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) * Maybe it requires 0xbf to be written to the LCR. * (other ST16C650V2 UARTs, TI16C752A, etc) */ - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) { DEBUG_AUTOCONF("EFRv2 "); autoconfig_has_efr(up); @@ -1024,7 +1024,7 @@ static void autoconfig_16550a(struct uart_8250_port *up) serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status1 = serial_in(up, UART_IIR) >> 5; serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_outp(up, UART_LCR, UART_LCR_DLAB); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); status2 = serial_in(up, UART_IIR) >> 5; serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO); @@ -1183,7 +1183,7 @@ static void autoconfig(struct uart_8250_port *up, unsigned int probeflags) * We also initialise the EFR (if any) to zero for later. The * EFR occupies the same register location as the FCR and IIR. */ - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, 0); serial_outp(up, UART_LCR, 0); @@ -1952,7 +1952,7 @@ static int serial8250_startup(struct uart_port *port) if (up->port.type == PORT_16C950) { /* Wake up and initialize UART */ up->acr = 0; - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, UART_EFR_ECB); serial_outp(up, UART_IER, 0); serial_outp(up, UART_LCR, 0); @@ -2002,7 +2002,7 @@ static int serial8250_startup(struct uart_port *port) if (up->port.type == PORT_16850) { unsigned char fctr; - serial_outp(up, UART_LCR, 0xbf); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); fctr = serial_inp(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX); serial_outp(up, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_RX); @@ -2363,7 +2363,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_cflag & CRTSCTS) efr |= UART_EFR_CTS; - serial_outp(up, UART_LCR, 0xBF); + serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_outp(up, UART_EFR, efr); } diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c index 03a96db67de..1201eff1831 100644 --- a/drivers/serial/omap-serial.c +++ b/drivers/serial/omap-serial.c @@ -570,7 +570,7 @@ serial_omap_configure_xonxoff unsigned char efr = 0; up->lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr & ~UART_EFR_ECB); @@ -598,7 +598,7 @@ serial_omap_configure_xonxoff efr |= OMAP_UART_SW_RX; serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); @@ -612,14 +612,14 @@ serial_omap_configure_xonxoff up->mcr |= UART_MCR_XONANY; serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG); /* Enable special char function UARTi.EFR_REG[5] and * load the new software flow control mode IXON or IXOFF * and restore the UARTi.EFR_REG[4] ENHANCED_EN value. */ serial_out(up, UART_EFR, efr | UART_EFR_SCD); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr & ~UART_MCR_TCRTLR); serial_out(up, UART_LCR, up->lcr); @@ -724,22 +724,22 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, * baud clock is not running * DLL_REG and DLH_REG set to 0. */ - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_DLL, 0); serial_out(up, UART_DLM, 0); serial_out(up, UART_LCR, 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); /* FIFO ENABLE, DMA MODE */ serial_out(up, UART_FCR, up->fcr); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); if (up->use_dma) { serial_out(up, UART_TI752_TLR, 0); @@ -748,27 +748,27 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, } serial_out(up, UART_EFR, up->efr); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr); /* Protocol, Baud Rate, and Interrupt Settings */ serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_DLL, quot & 0xff); /* LS of divisor */ serial_out(up, UART_DLM, quot >> 8); /* MS of divisor */ serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, up->ier); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, up->efr); serial_out(up, UART_LCR, cval); @@ -782,18 +782,18 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_cflag & CRTSCTS) { efr |= (UART_EFR_CTS | UART_EFR_RTS); - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); up->mcr = serial_in(up, UART_MCR); serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); up->efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, up->efr | UART_EFR_ECB); serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG); serial_out(up, UART_EFR, efr); /* Enable AUTORTS and AUTOCTS */ - serial_out(up, UART_LCR, UART_LCR_DLAB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); serial_out(up, UART_MCR, up->mcr | UART_MCR_RTS); serial_out(up, UART_LCR, cval); } @@ -815,13 +815,13 @@ serial_omap_pm(struct uart_port *port, unsigned int state, unsigned char efr; dev_dbg(up->port.dev, "serial_omap_pm+%d\n", up->pdev->id); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); efr = serial_in(up, UART_EFR); serial_out(up, UART_EFR, efr | UART_EFR_ECB); serial_out(up, UART_LCR, 0); serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); - serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); /* Enable module level wake up */ diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 6f3823474e6..3ecb71a9e50 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -99,6 +99,13 @@ #define UART_LCR_WLEN7 0x02 /* Wordlength: 7 bits */ #define UART_LCR_WLEN8 0x03 /* Wordlength: 8 bits */ +/* + * Access to some registers depends on register access / configuration + * mode. + */ +#define UART_LCR_CONF_MODE_A UART_LCR_DLAB /* Configutation mode A */ +#define UART_LCR_CONF_MODE_B 0xBF /* Configutation mode B */ + #define UART_MCR 4 /* Out: Modem Control Register */ #define UART_MCR_CLKSEL 0x80 /* Divide clock by 4 (TI16C752, EFR[4]=1) */ #define UART_MCR_TCRTLR 0x40 /* Access TCR/TLR (TI16C752, EFR[4]=1) */ -- cgit v1.2.3-70-g09d2 From 7fc56f0d9908fe140a01387d59954e3d0a2e7744 Mon Sep 17 00:00:00 2001 From: Luo Andy Date: Tue, 23 Nov 2010 10:41:21 +0800 Subject: usb: gadget: langwell_udc: add usb test mode support This patch adds test mode support for Langwell gadget driver. Signed-off-by: Henry Yuan Signed-off-by: Andy Luo Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/langwell_udc.c | 23 +++++++++++++++++++++++ include/linux/usb/ch9.h | 10 ++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c index b8ec954c069..777972454e3 100644 --- a/drivers/usb/gadget/langwell_udc.c +++ b/drivers/usb/gadget/langwell_udc.c @@ -2225,6 +2225,7 @@ static void handle_setup_packet(struct langwell_udc *dev, u16 wValue = le16_to_cpu(setup->wValue); u16 wIndex = le16_to_cpu(setup->wIndex); u16 wLength = le16_to_cpu(setup->wLength); + u32 portsc1; dev_vdbg(&dev->pdev->dev, "---> %s()\n", __func__); @@ -2313,6 +2314,28 @@ static void handle_setup_packet(struct langwell_udc *dev, dev->dev_status &= ~(1 << wValue); } break; + case USB_DEVICE_TEST_MODE: + dev_dbg(&dev->pdev->dev, "SETUP: TEST MODE\n"); + if ((wIndex & 0xff) || + (dev->gadget.speed != USB_SPEED_HIGH)) + ep0_stall(dev); + + switch (wIndex >> 8) { + case TEST_J: + case TEST_K: + case TEST_SE0_NAK: + case TEST_PACKET: + case TEST_FORCE_EN: + if (prime_status_phase(dev, EP_DIR_IN)) + ep0_stall(dev); + portsc1 = readl(&dev->op_regs->portsc1); + portsc1 |= (wIndex & 0xf00) << 8; + writel(portsc1, &dev->op_regs->portsc1); + goto end; + default: + rc = -EOPNOTSUPP; + } + break; default: rc = -EOPNOTSUPP; break; diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index f917bbbc890..ab461948b57 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -123,6 +123,16 @@ #define USB_DEVICE_A_ALT_HNP_SUPPORT 5 /* (otg) other RH port does */ #define USB_DEVICE_DEBUG_MODE 6 /* (special devices only) */ +/* + * Test Mode Selectors + * See USB 2.0 spec Table 9-7 + */ +#define TEST_J 1 +#define TEST_K 2 +#define TEST_SE0_NAK 3 +#define TEST_PACKET 4 +#define TEST_FORCE_EN 5 + /* * New Feature Selectors as added by USB 3.0 * See USB 3.0 spec Table 9-6 -- cgit v1.2.3-70-g09d2 From 3c77f845722158206a7209c45ccddc264d19319c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Nov 2010 20:55:34 +0100 Subject: exec: make argv/envp memory visible to oom-killer Brad Spengler published a local memory-allocation DoS that evades the OOM-killer (though not the virtual memory RLIMIT): http://www.grsecurity.net/~spender/64bit_dos.c execve()->copy_strings() can allocate a lot of memory, but this is not visible to oom-killer, nobody can see the nascent bprm->mm and take it into account. With this patch get_arg_page() increments current's MM_ANONPAGES counter every time we allocate the new page for argv/envp. When do_execve() succeds or fails, we change this counter back. Technically this is not 100% correct, we can't know if the new page is swapped out and turn MM_ANONPAGES into MM_SWAPENTS, but I don't think this really matters and everything becomes correct once exec changes ->mm or fails. Reported-by: Brad Spengler Reviewed-and-discussed-by: KOSAKI Motohiro Signed-off-by: Oleg Nesterov Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/exec.c | 32 ++++++++++++++++++++++++++++++-- include/linux/binfmts.h | 1 + 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 99d33a1371e..4303b9035fe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -164,6 +164,25 @@ out: #ifdef CONFIG_MMU +static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +{ + struct mm_struct *mm = current->mm; + long diff = (long)(pages - bprm->vma_pages); + + if (!mm || !diff) + return; + + bprm->vma_pages = pages; + +#ifdef SPLIT_RSS_COUNTING + add_mm_counter(mm, MM_ANONPAGES, diff); +#else + spin_lock(&mm->page_table_lock); + add_mm_counter(mm, MM_ANONPAGES, diff); + spin_unlock(&mm->page_table_lock); +#endif +} + static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { @@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; struct rlimit *rlim; + acct_arg_size(bprm, size / PAGE_SIZE); + /* * We've historically supported up to 32 pages (ARG_MAX) * of argument strings even with small stacks @@ -276,6 +297,10 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) #else +static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +{ +} + static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { @@ -1003,6 +1028,7 @@ int flush_old_exec(struct linux_binprm * bprm) /* * Release all of the old mmap stuff */ + acct_arg_size(bprm, 0); retval = exec_mmap(bprm->mm); if (retval) goto out; @@ -1426,8 +1452,10 @@ int do_execve(const char * filename, return retval; out: - if (bprm->mm) - mmput (bprm->mm); + if (bprm->mm) { + acct_arg_size(bprm, 0); + mmput(bprm->mm); + } out_file: if (bprm->file) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a065612fc92..7c87796d20d 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -29,6 +29,7 @@ struct linux_binprm{ char buf[BINPRM_BUF_SIZE]; #ifdef CONFIG_MMU struct vm_area_struct *vma; + unsigned long vma_pages; #else # define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; -- cgit v1.2.3-70-g09d2 From 114279be2120a916e8a04feeb2ac976a10016f2f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Nov 2010 20:56:02 +0100 Subject: exec: copy-and-paste the fixes into compat_do_execve() paths Note: this patch targets 2.6.37 and tries to be as simple as possible. That is why it adds more copy-and-paste horror into fs/compat.c and uglifies fs/exec.c, this will be cleanuped later. compat_copy_strings() plays with bprm->vma/mm directly and thus has two problems: it lacks the RLIMIT_STACK check and argv/envp memory is not visible to oom killer. Export acct_arg_size() and get_arg_page(), change compat_copy_strings() to use get_arg_page(), change compat_do_execve() to do acct_arg_size(0) as do_execve() does. Add the fatal_signal_pending/cond_resched checks into compat_count() and compat_copy_strings(), this matches the code in fs/exec.c and certainly makes sense. Signed-off-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/compat.c | 28 +++++++++++++++------------- fs/exec.c | 8 ++++---- include/linux/binfmts.h | 4 ++++ 3 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index c580c322fa6..eb1740ac8c0 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1350,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max) argv++; if (i++ >= max) return -E2BIG; + + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); } } return i; @@ -1391,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, while (len > 0) { int offset, bytes_to_copy; + if (fatal_signal_pending(current)) { + ret = -ERESTARTNOHAND; + goto out; + } + cond_resched(); + offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; @@ -1407,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, if (!kmapped_page || kpos != (pos & PAGE_MASK)) { struct page *page; -#ifdef CONFIG_STACK_GROWSUP - ret = expand_stack_downwards(bprm->vma, pos); - if (ret < 0) { - /* We've exceed the stack rlimit. */ - ret = -E2BIG; - goto out; - } -#endif - ret = get_user_pages(current, bprm->mm, pos, - 1, 1, 1, &page, NULL); - if (ret <= 0) { - /* We've exceed the stack rlimit. */ + page = get_arg_page(bprm, pos, 1); + if (!page) { ret = -E2BIG; goto out; } @@ -1539,8 +1539,10 @@ int compat_do_execve(char * filename, return retval; out: - if (bprm->mm) + if (bprm->mm) { + acct_arg_size(bprm, 0); mmput(bprm->mm); + } out_file: if (bprm->file) { diff --git a/fs/exec.c b/fs/exec.c index 4303b9035fe..d68c378a313 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -164,7 +164,7 @@ out: #ifdef CONFIG_MMU -static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; long diff = (long)(pages - bprm->vma_pages); @@ -183,7 +183,7 @@ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) #endif } -static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; @@ -297,11 +297,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) #else -static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) +void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { } -static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, +struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7c87796d20d..64a7114a939 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -60,6 +60,10 @@ struct linux_binprm{ unsigned long loader, exec; }; +extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages); +extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, + int write); + #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) -- cgit v1.2.3-70-g09d2 From b029ffafe89cf4b97cf39e0225a5205cbbf9e02f Mon Sep 17 00:00:00 2001 From: Hemanth V Date: Tue, 30 Nov 2010 23:03:54 -0800 Subject: Input: add CMA3000 accelerometer driver Add support for CMA3000 Tri-axis accelerometer, which supports Motion detect, Measurement and Free fall modes. CMA3000 supports both I2C/SPI bus for communication, currently the driver supports I2C based communication. Signed-off-by: Hemanth V Reviewed-by: Jonathan Cameron Reviewed-by: Sergio Aguirre Reviewed-by: Shubhrajyoti Signed-off-by: Dmitry Torokhov --- Documentation/input/cma3000_d0x.txt | 115 ++++++++++ drivers/input/misc/Kconfig | 24 +++ drivers/input/misc/Makefile | 2 + drivers/input/misc/cma3000_d0x.c | 398 +++++++++++++++++++++++++++++++++++ drivers/input/misc/cma3000_d0x.h | 42 ++++ drivers/input/misc/cma3000_d0x_i2c.c | 141 +++++++++++++ include/linux/input/cma3000.h | 59 ++++++ 7 files changed, 781 insertions(+) create mode 100644 Documentation/input/cma3000_d0x.txt create mode 100644 drivers/input/misc/cma3000_d0x.c create mode 100644 drivers/input/misc/cma3000_d0x.h create mode 100644 drivers/input/misc/cma3000_d0x_i2c.c create mode 100644 include/linux/input/cma3000.h (limited to 'include/linux') diff --git a/Documentation/input/cma3000_d0x.txt b/Documentation/input/cma3000_d0x.txt new file mode 100644 index 00000000000..29d088db4af --- /dev/null +++ b/Documentation/input/cma3000_d0x.txt @@ -0,0 +1,115 @@ +Kernel driver for CMA3000-D0x +============================ + +Supported chips: +* VTI CMA3000-D0x +Datasheet: + CMA3000-D0X Product Family Specification 8281000A.02.pdf + + +Author: Hemanth V + + +Description +----------- +CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and +Free fall modes. + +Motion Detect Mode: Its the low power mode where interrupts are generated only +when motion exceeds the defined thresholds. + +Measurement Mode: This mode is used to read the acceleration data on X,Y,Z +axis and supports 400, 100, 40 Hz sample frequency. + +Free fall Mode: This mode is intended to save system resources. + +Threshold values: Chip supports defining threshold values for above modes +which includes time and g value. Refer product specifications for more details. + +CMA3000 chip supports mutually exclusive I2C and SPI interfaces for +communication, currently the driver supports I2C based communication only. +Initial configuration for bus mode is set in non volatile memory and can later +be modified through bus interface command. + +Driver reports acceleration data through input subsystem. It generates ABS_MISC +event with value 1 when free fall is detected. + +Platform data need to be configured for initial default values. + +Platform Data +------------- +fuzz_x: Noise on X Axis + +fuzz_y: Noise on Y Axis + +fuzz_z: Noise on Z Axis + +g_range: G range in milli g i.e 2000 or 8000 + +mode: Default Operating mode + +mdthr: Motion detect g range threshold value + +mdfftmr: Motion detect and free fall time threshold value + +ffthr: Free fall g range threshold value + +Input Interface +-------------- +Input driver version is 1.0.0 +Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0 +Input device name: "cma3000-accelerometer" +Supported events: + Event type 0 (Sync) + Event type 3 (Absolute) + Event code 0 (X) + Value 47 + Min -8000 + Max 8000 + Fuzz 200 + Event code 1 (Y) + Value -28 + Min -8000 + Max 8000 + Fuzz 200 + Event code 2 (Z) + Value 905 + Min -8000 + Max 8000 + Fuzz 200 + Event code 40 (Misc) + Value 0 + Min 0 + Max 1 + Event type 4 (Misc) + + +Register/Platform parameters Description +---------------------------------------- + +mode: + 0: power down mode + 1: 100 Hz Measurement mode + 2: 400 Hz Measurement mode + 3: 40 Hz Measurement mode + 4: Motion Detect mode (default) + 5: 100 Hz Free fall mode + 6: 40 Hz Free fall mode + 7: Power off mode + +grange: + 2000: 2000 mg or 2G Range + 8000: 8000 mg or 8G Range + +mdthr: + X: X * 71mg (8G Range) + X: X * 18mg (2G Range) + +mdfftmr: + X: (X & 0x70) * 100 ms (MDTMR) + (X & 0x0F) * 2.5 ms (FFTMR 400 Hz) + (X & 0x0F) * 10 ms (FFTMR 100 Hz) + +ffthr: + X: (X >> 2) * 18mg (2G Range) + X: (X & 0x0F) * 71 mg (8G Range) diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index b99b8cbde02..f0d90172a65 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -448,4 +448,28 @@ config INPUT_ADXL34X_SPI To compile this driver as a module, choose M here: the module will be called adxl34x-spi. +config INPUT_CMA3000 + tristate "VTI CMA3000 Tri-axis accelerometer" + help + Say Y here if you want to use VTI CMA3000_D0x Accelerometer + driver + + This driver currently only supports I2C interface to the + controller. Also select the I2C method. + + If unsure, say N + + To compile this driver as a module, choose M here: the + module will be called cma3000_d0x. + +config INPUT_CMA3000_I2C + tristate "Support I2C bus connection" + depends on INPUT_CMA3000 && I2C + help + Say Y here if you want to use VTI CMA3000_D0x Accelerometer + through I2C interface. + + To compile this driver as a module, choose M here: the + module will be called cma3000_d0x_i2c. + endif diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 1fe1f6c8b73..35bcfe46555 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -18,6 +18,8 @@ obj-$(CONFIG_INPUT_ATI_REMOTE2) += ati_remote2.o obj-$(CONFIG_INPUT_ATLAS_BTNS) += atlas_btns.o obj-$(CONFIG_INPUT_BFIN_ROTARY) += bfin_rotary.o obj-$(CONFIG_INPUT_CM109) += cm109.o +obj-$(CONFIG_INPUT_CMA3000) += cma3000_d0x.o +obj-$(CONFIG_INPUT_CMA3000_I2C) += cma3000_d0x_i2c.o obj-$(CONFIG_INPUT_COBALT_BTNS) += cobalt_btns.o obj-$(CONFIG_INPUT_DM355EVM) += dm355evm_keys.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o diff --git a/drivers/input/misc/cma3000_d0x.c b/drivers/input/misc/cma3000_d0x.c new file mode 100644 index 00000000000..1633b634226 --- /dev/null +++ b/drivers/input/misc/cma3000_d0x.c @@ -0,0 +1,398 @@ +/* + * VTI CMA3000_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "cma3000_d0x.h" + +#define CMA3000_WHOAMI 0x00 +#define CMA3000_REVID 0x01 +#define CMA3000_CTRL 0x02 +#define CMA3000_STATUS 0x03 +#define CMA3000_RSTR 0x04 +#define CMA3000_INTSTATUS 0x05 +#define CMA3000_DOUTX 0x06 +#define CMA3000_DOUTY 0x07 +#define CMA3000_DOUTZ 0x08 +#define CMA3000_MDTHR 0x09 +#define CMA3000_MDFFTMR 0x0A +#define CMA3000_FFTHR 0x0B + +#define CMA3000_RANGE2G (1 << 7) +#define CMA3000_RANGE8G (0 << 7) +#define CMA3000_BUSI2C (0 << 4) +#define CMA3000_MODEMASK (7 << 1) +#define CMA3000_GRANGEMASK (1 << 7) + +#define CMA3000_STATUS_PERR 1 +#define CMA3000_INTSTATUS_FFDET (1 << 2) + +/* Settling time delay in ms */ +#define CMA3000_SETDELAY 30 + +/* Delay for clearing interrupt in us */ +#define CMA3000_INTDELAY 44 + + +/* + * Bit weights in mg for bit 0, other bits need + * multipy factor 2^n. Eight bit is the sign bit. + */ +#define BIT_TO_2G 18 +#define BIT_TO_8G 71 + +struct cma3000_accl_data { + const struct cma3000_bus_ops *bus_ops; + const struct cma3000_platform_data *pdata; + + struct device *dev; + struct input_dev *input_dev; + + int bit_to_mg; + int irq; + + int g_range; + u8 mode; + + struct mutex mutex; + bool opened; + bool suspended; +}; + +#define CMA3000_READ(data, reg, msg) \ + (data->bus_ops->read(data->dev, reg, msg)) +#define CMA3000_SET(data, reg, val, msg) \ + ((data)->bus_ops->write(data->dev, reg, val, msg)) + +/* + * Conversion for each of the eight modes to g, depending + * on G range i.e 2G or 8G. Some modes always operate in + * 8G. + */ + +static int mode_to_mg[8][2] = { + { 0, 0 }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_8G }, + { BIT_TO_8G, BIT_TO_8G }, + { BIT_TO_8G, BIT_TO_2G }, + { BIT_TO_8G, BIT_TO_2G }, + { 0, 0}, +}; + +static void decode_mg(struct cma3000_accl_data *data, int *datax, + int *datay, int *dataz) +{ + /* Data in 2's complement, convert to mg */ + *datax = ((s8)*datax) * data->bit_to_mg; + *datay = ((s8)*datay) * data->bit_to_mg; + *dataz = ((s8)*dataz) * data->bit_to_mg; +} + +static irqreturn_t cma3000_thread_irq(int irq, void *dev_id) +{ + struct cma3000_accl_data *data = dev_id; + int datax, datay, dataz; + u8 ctrl, mode, range, intr_status; + + intr_status = CMA3000_READ(data, CMA3000_INTSTATUS, "interrupt status"); + if (intr_status < 0) + return IRQ_NONE; + + /* Check if free fall is detected, report immediately */ + if (intr_status & CMA3000_INTSTATUS_FFDET) { + input_report_abs(data->input_dev, ABS_MISC, 1); + input_sync(data->input_dev); + } else { + input_report_abs(data->input_dev, ABS_MISC, 0); + } + + datax = CMA3000_READ(data, CMA3000_DOUTX, "X"); + datay = CMA3000_READ(data, CMA3000_DOUTY, "Y"); + dataz = CMA3000_READ(data, CMA3000_DOUTZ, "Z"); + + ctrl = CMA3000_READ(data, CMA3000_CTRL, "ctrl"); + mode = (ctrl & CMA3000_MODEMASK) >> 1; + range = (ctrl & CMA3000_GRANGEMASK) >> 7; + + data->bit_to_mg = mode_to_mg[mode][range]; + + /* Interrupt not for this device */ + if (data->bit_to_mg == 0) + return IRQ_NONE; + + /* Decode register values to milli g */ + decode_mg(data, &datax, &datay, &dataz); + + input_report_abs(data->input_dev, ABS_X, datax); + input_report_abs(data->input_dev, ABS_Y, datay); + input_report_abs(data->input_dev, ABS_Z, dataz); + input_sync(data->input_dev); + + return IRQ_HANDLED; +} + +static int cma3000_reset(struct cma3000_accl_data *data) +{ + int val; + + /* Reset sequence */ + CMA3000_SET(data, CMA3000_RSTR, 0x02, "Reset"); + CMA3000_SET(data, CMA3000_RSTR, 0x0A, "Reset"); + CMA3000_SET(data, CMA3000_RSTR, 0x04, "Reset"); + + /* Settling time delay */ + mdelay(10); + + val = CMA3000_READ(data, CMA3000_STATUS, "Status"); + if (val < 0) { + dev_err(data->dev, "Reset failed\n"); + return val; + } + + if (val & CMA3000_STATUS_PERR) { + dev_err(data->dev, "Parity Error\n"); + return -EIO; + } + + return 0; +} + +static int cma3000_poweron(struct cma3000_accl_data *data) +{ + const struct cma3000_platform_data *pdata = data->pdata; + u8 ctrl = 0; + int ret; + + if (data->g_range == CMARANGE_2G) { + ctrl = (data->mode << 1) | CMA3000_RANGE2G; + } else if (data->g_range == CMARANGE_8G) { + ctrl = (data->mode << 1) | CMA3000_RANGE8G; + } else { + dev_info(data->dev, + "Invalid G range specified, assuming 8G\n"); + ctrl = (data->mode << 1) | CMA3000_RANGE8G; + } + + ctrl |= data->bus_ops->ctrl_mod; + + CMA3000_SET(data, CMA3000_MDTHR, pdata->mdthr, + "Motion Detect Threshold"); + CMA3000_SET(data, CMA3000_MDFFTMR, pdata->mdfftmr, + "Time register"); + CMA3000_SET(data, CMA3000_FFTHR, pdata->ffthr, + "Free fall threshold"); + ret = CMA3000_SET(data, CMA3000_CTRL, ctrl, "Mode setting"); + if (ret < 0) + return -EIO; + + msleep(CMA3000_SETDELAY); + + return 0; +} + +static int cma3000_poweroff(struct cma3000_accl_data *data) +{ + int ret; + + ret = CMA3000_SET(data, CMA3000_CTRL, CMAMODE_POFF, "Mode setting"); + msleep(CMA3000_SETDELAY); + + return ret; +} + +static int cma3000_open(struct input_dev *input_dev) +{ + struct cma3000_accl_data *data = input_get_drvdata(input_dev); + + mutex_lock(&data->mutex); + + if (!data->suspended) + cma3000_poweron(data); + + data->opened = true; + + mutex_unlock(&data->mutex); + + return 0; +} + +static void cma3000_close(struct input_dev *input_dev) +{ + struct cma3000_accl_data *data = input_get_drvdata(input_dev); + + mutex_lock(&data->mutex); + + if (!data->suspended) + cma3000_poweroff(data); + + data->opened = false; + + mutex_unlock(&data->mutex); +} + +void cma3000_suspend(struct cma3000_accl_data *data) +{ + mutex_lock(&data->mutex); + + if (!data->suspended && data->opened) + cma3000_poweroff(data); + + data->suspended = true; + + mutex_unlock(&data->mutex); +} +EXPORT_SYMBOL(cma3000_suspend); + + +void cma3000_resume(struct cma3000_accl_data *data) +{ + mutex_lock(&data->mutex); + + if (data->suspended && data->opened) + cma3000_poweron(data); + + data->suspended = false; + + mutex_unlock(&data->mutex); +} +EXPORT_SYMBOL(cma3000_resume); + +struct cma3000_accl_data *cma3000_init(struct device *dev, int irq, + const struct cma3000_bus_ops *bops) +{ + const struct cma3000_platform_data *pdata = dev->platform_data; + struct cma3000_accl_data *data; + struct input_dev *input_dev; + int rev; + int error; + + if (!pdata) { + dev_err(dev, "platform data not found\n"); + error = -EINVAL; + goto err_out; + } + + + /* if no IRQ return error */ + if (irq == 0) { + error = -EINVAL; + goto err_out; + } + + data = kzalloc(sizeof(struct cma3000_accl_data), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!data || !input_dev) { + error = -ENOMEM; + goto err_free_mem; + } + + data->dev = dev; + data->input_dev = input_dev; + data->bus_ops = bops; + data->pdata = pdata; + data->irq = irq; + mutex_init(&data->mutex); + + data->mode = pdata->mode; + if (data->mode < CMAMODE_DEFAULT || data->mode > CMAMODE_POFF) { + data->mode = CMAMODE_MOTDET; + dev_warn(dev, + "Invalid mode specified, assuming Motion Detect\n"); + } + + data->g_range = pdata->g_range; + if (data->g_range != CMARANGE_2G && data->g_range != CMARANGE_8G) { + dev_info(dev, + "Invalid G range specified, assuming 8G\n"); + data->g_range = CMARANGE_8G; + } + + input_dev->name = "cma3000-accelerometer"; + input_dev->id.bustype = bops->bustype; + input_dev->open = cma3000_open; + input_dev->close = cma3000_close; + + __set_bit(EV_ABS, input_dev->evbit); + + input_set_abs_params(input_dev, ABS_X, + -data->g_range, data->g_range, pdata->fuzz_x, 0); + input_set_abs_params(input_dev, ABS_Y, + -data->g_range, data->g_range, pdata->fuzz_y, 0); + input_set_abs_params(input_dev, ABS_Z, + -data->g_range, data->g_range, pdata->fuzz_z, 0); + input_set_abs_params(input_dev, ABS_MISC, 0, 1, 0, 0); + + input_set_drvdata(input_dev, data); + + error = cma3000_reset(data); + if (error) + goto err_free_mem; + + rev = CMA3000_READ(data, CMA3000_REVID, "Revid"); + if (rev < 0) { + error = rev; + goto err_free_mem; + } + + pr_info("CMA3000 Accelerometer: Revision %x\n", rev); + + error = request_threaded_irq(irq, NULL, cma3000_thread_irq, + pdata->irqflags | IRQF_ONESHOT, + "cma3000_d0x", data); + if (error) { + dev_err(dev, "request_threaded_irq failed\n"); + goto err_free_mem; + } + + error = input_register_device(data->input_dev); + if (error) { + dev_err(dev, "Unable to register input device\n"); + goto err_free_irq; + } + + return data; + +err_free_irq: + free_irq(irq, data); +err_free_mem: + input_free_device(input_dev); + kfree(data); +err_out: + return ERR_PTR(error); +} +EXPORT_SYMBOL(cma3000_init); + +void cma3000_exit(struct cma3000_accl_data *data) +{ + free_irq(data->irq, data); + input_unregister_device(data->input_dev); + kfree(data); +} +EXPORT_SYMBOL(cma3000_exit); + +MODULE_DESCRIPTION("CMA3000-D0x Accelerometer Driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hemanth V "); diff --git a/drivers/input/misc/cma3000_d0x.h b/drivers/input/misc/cma3000_d0x.h new file mode 100644 index 00000000000..2304ce306e1 --- /dev/null +++ b/drivers/input/misc/cma3000_d0x.h @@ -0,0 +1,42 @@ +/* + * VTI CMA3000_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _INPUT_CMA3000_H +#define _INPUT_CMA3000_H + +#include +#include + +struct device; +struct cma3000_accl_data; + +struct cma3000_bus_ops { + u16 bustype; + u8 ctrl_mod; + int (*read)(struct device *, u8, char *); + int (*write)(struct device *, u8, u8, char *); +}; + +struct cma3000_accl_data *cma3000_init(struct device *dev, int irq, + const struct cma3000_bus_ops *bops); +void cma3000_exit(struct cma3000_accl_data *); +void cma3000_suspend(struct cma3000_accl_data *); +void cma3000_resume(struct cma3000_accl_data *); + +#endif diff --git a/drivers/input/misc/cma3000_d0x_i2c.c b/drivers/input/misc/cma3000_d0x_i2c.c new file mode 100644 index 00000000000..c52d278a794 --- /dev/null +++ b/drivers/input/misc/cma3000_d0x_i2c.c @@ -0,0 +1,141 @@ +/* + * Implements I2C interface for VTI CMA300_D0x Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include "cma3000_d0x.h" + +static int cma3000_i2c_set(struct device *dev, + u8 reg, u8 val, char *msg) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, val); + if (ret < 0) + dev_err(&client->dev, + "%s failed (%s, %d)\n", __func__, msg, ret); + return ret; +} + +static int cma3000_i2c_read(struct device *dev, u8 reg, char *msg) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + if (ret < 0) + dev_err(&client->dev, + "%s failed (%s, %d)\n", __func__, msg, ret); + return ret; +} + +static const struct cma3000_bus_ops cma3000_i2c_bops = { + .bustype = BUS_I2C, +#define CMA3000_BUSI2C (0 << 4) + .ctrl_mod = CMA3000_BUSI2C, + .read = cma3000_i2c_read, + .write = cma3000_i2c_set, +}; + +static int __devinit cma3000_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct cma3000_accl_data *data; + + data = cma3000_init(&client->dev, client->irq, &cma3000_i2c_bops); + if (IS_ERR(data)) + return PTR_ERR(data); + + i2c_set_clientdata(client, data); + + return 0; +} + +static int __devexit cma3000_i2c_remove(struct i2c_client *client) +{ + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_exit(data); + + return 0; +} + +#ifdef CONFIG_PM +static int cma3000_i2c_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_suspend(data); + + return 0; +} + +static int cma3000_i2c_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct cma3000_accl_data *data = i2c_get_clientdata(client); + + cma3000_resume(data); + + return 0; +} + +static const struct dev_pm_ops cma3000_i2c_pm_ops = { + .suspend = cma3000_i2c_suspend, + .resume = cma3000_i2c_resume, +}; +#endif + +static const struct i2c_device_id cma3000_i2c_id[] = { + { "cma3000_d01", 0 }, + { }, +}; + +static struct i2c_driver cma3000_i2c_driver = { + .probe = cma3000_i2c_probe, + .remove = __devexit_p(cma3000_i2c_remove), + .id_table = cma3000_i2c_id, + .driver = { + .name = "cma3000_i2c_accl", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &cma3000_i2c_pm_ops, +#endif + }, +}; + +static int __init cma3000_i2c_init(void) +{ + return i2c_add_driver(&cma3000_i2c_driver); +} + +static void __exit cma3000_i2c_exit(void) +{ + i2c_del_driver(&cma3000_i2c_driver); +} + +module_init(cma3000_i2c_init); +module_exit(cma3000_i2c_exit); + +MODULE_DESCRIPTION("CMA3000-D0x Accelerometer I2C Driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hemanth V "); diff --git a/include/linux/input/cma3000.h b/include/linux/input/cma3000.h new file mode 100644 index 00000000000..cbbaac27d31 --- /dev/null +++ b/include/linux/input/cma3000.h @@ -0,0 +1,59 @@ +/* + * VTI CMA3000_Dxx Accelerometer driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _LINUX_CMA3000_H +#define _LINUX_CMA3000_H + +#define CMAMODE_DEFAULT 0 +#define CMAMODE_MEAS100 1 +#define CMAMODE_MEAS400 2 +#define CMAMODE_MEAS40 3 +#define CMAMODE_MOTDET 4 +#define CMAMODE_FF100 5 +#define CMAMODE_FF400 6 +#define CMAMODE_POFF 7 + +#define CMARANGE_2G 2000 +#define CMARANGE_8G 8000 + +/** + * struct cma3000_i2c_platform_data - CMA3000 Platform data + * @fuzz_x: Noise on X Axis + * @fuzz_y: Noise on Y Axis + * @fuzz_z: Noise on Z Axis + * @g_range: G range in milli g i.e 2000 or 8000 + * @mode: Operating mode + * @mdthr: Motion detect threshold value + * @mdfftmr: Motion detect and free fall time value + * @ffthr: Free fall threshold value + */ + +struct cma3000_platform_data { + int fuzz_x; + int fuzz_y; + int fuzz_z; + int g_range; + uint8_t mode; + uint8_t mdthr; + uint8_t mdfftmr; + uint8_t ffthr; + unsigned long irqflags; +}; + +#endif -- cgit v1.2.3-70-g09d2 From 0417596f66dd6621f4fd46563c7c56a95311dbe8 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Mon, 29 Nov 2010 23:33:05 -0800 Subject: Input: add keycodes for touchpad on/off keys Some laptops will have a "touchpad toggle" soft button, which expects user-space to turn off the touchpad themselves, some other devices will do this in hardware, but send key events telling us that the touchpad has been turned off/on. KEY_TOUCHPAD_ON/KEY_TOUCHPAD_OFF will be used by user-space to show a popup with the status of the touchpad. Signed-off-by: Bastien Nocera Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 6ef44465db8..a50046f1537 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -624,6 +624,10 @@ struct input_keymap_entry { #define KEY_CAMERA_FOCUS 0x210 #define KEY_WPS_BUTTON 0x211 /* WiFi Protected Setup key */ +#define KEY_TOUCHPAD_TOGGLE 0x212 /* Request switch touchpad on or off */ +#define KEY_TOUCHPAD_ON 0x213 +#define KEY_TOUCHPAD_OFF 0x214 + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 -- cgit v1.2.3-70-g09d2 From 86b17f76f462db460d6d916e105a4c44cb353e36 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 29 Nov 2010 23:33:04 -0800 Subject: Input: document struct input_absinfo Add documentation for struct input_absinfo that is used in EVIOCGABS and EVIOCSABS ioctl and specify units of measure used for reporting resolution for an axis. Acked-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index a50046f1537..a8af21d42bc 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -47,6 +47,25 @@ struct input_id { __u16 version; }; +/** + * struct input_absinfo - used by EVIOCGABS/EVIOCSABS ioctls + * @value: latest reported value for the axis. + * @minimum: specifies minimum value for the axis. + * @maximum: specifies maximum value for the axis. + * @fuzz: specifies fuzz value that is used to filter noise from + * the event stream. + * @flat: values that are within this value will be discarded by + * joydev interface and reported as 0 instead. + * @resolution: specifies resolution for the values reported for + * the axis. + * + * Note that input core does not clamp reported values to the + * [minimum, maximum] limits, such task is left to userspace. + * + * Resolution for main axes (ABS_X, ABS_Y, ABS_Z) is reported in + * units per millimeter (units/mm), resolution for rotational axes + * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + */ struct input_absinfo { __s32 value; __s32 minimum; @@ -1134,7 +1153,7 @@ struct input_mt_slot { * of tracked contacts * @mtsize: number of MT slots the device uses * @slot: MT slot currently being transmitted - * @absinfo: array of &struct absinfo elements holding information + * @absinfo: array of &struct input_absinfo elements holding information * about absolute axes (current value, min, max, flat, fuzz, * resolution) * @key: reflects current state of device's keys/buttons -- cgit v1.2.3-70-g09d2 From 8348c259dd6a6019a8fa01b0a3443409480f7b9d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 22 Nov 2010 17:12:15 -0800 Subject: arm/pxa2xx: reorgazine SSP and SPI header files The PXA-SPI driver relies on some files / defines which are arm specific and are within the ARM tree. The CE4100 SoC which is x86 has also the SPI core. This patch moves the ssp and spi files from arm/mach-pxa and plat-pxa to include/linux where the CE4100 can access them. This move got verified by building the following defconfigs: cm_x2xx_defconfig corgi_defconfig em_x270_defconfig ezx_defconfig imote2_defconfig pxa3xx_defconfig spitz_defconfig zeus_defconfig raumfeld_defconfig magician_defconfig Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dirk Brandewie --- Documentation/spi/pxa2xx | 4 +- arch/arm/mach-pxa/cm-x255.c | 2 +- arch/arm/mach-pxa/cm-x270.c | 2 +- arch/arm/mach-pxa/corgi.c | 2 +- arch/arm/mach-pxa/devices.c | 2 +- arch/arm/mach-pxa/em-x270.c | 2 +- arch/arm/mach-pxa/hx4700.c | 2 +- arch/arm/mach-pxa/icontrol.c | 2 +- arch/arm/mach-pxa/include/mach/pxa2xx_spi.h | 47 ------- arch/arm/mach-pxa/littleton.c | 2 +- arch/arm/mach-pxa/lubbock.c | 2 +- arch/arm/mach-pxa/pcm027.c | 2 +- arch/arm/mach-pxa/poodle.c | 2 +- arch/arm/mach-pxa/spitz.c | 3 +- arch/arm/mach-pxa/stargate2.c | 2 +- arch/arm/mach-pxa/tosa.c | 2 +- arch/arm/mach-pxa/trizeps4.c | 1 - arch/arm/mach-pxa/z2.c | 2 +- arch/arm/mach-pxa/zeus.c | 2 +- arch/arm/plat-pxa/include/plat/ssp.h | 187 ---------------------------- arch/arm/plat-pxa/ssp.c | 2 +- drivers/spi/pxa2xx_spi.c | 4 +- include/linux/pxa2xx_ssp.h | 187 ++++++++++++++++++++++++++++ include/linux/spi/pxa2xx_spi.h | 49 ++++++++ sound/soc/pxa/pxa-ssp.c | 2 +- 25 files changed, 257 insertions(+), 259 deletions(-) delete mode 100644 arch/arm/mach-pxa/include/mach/pxa2xx_spi.h delete mode 100644 arch/arm/plat-pxa/include/plat/ssp.h create mode 100644 include/linux/pxa2xx_ssp.h create mode 100644 include/linux/spi/pxa2xx_spi.h (limited to 'include/linux') diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index 6bb916d57c9..68a4fe3818a 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx @@ -19,7 +19,7 @@ Declaring PXA2xx Master Controllers ----------------------------------- Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a "platform device". The master configuration is passed to the driver via a table -found in arch/arm/mach-pxa/include/mach/pxa2xx_spi.h: +found in include/linux/spi/pxa2xx_spi.h: struct pxa2xx_spi_master { enum pxa_ssp_type ssp_type; @@ -94,7 +94,7 @@ using the "spi_board_info" structure found in "linux/spi/spi.h". See Each slave device attached to the PXA must provide slave specific configuration information via the structure "pxa2xx_spi_chip" found in -"arch/arm/mach-pxa/include/mach/pxa2xx_spi.h". The pxa2xx_spi master controller driver +"include/linux/spi/pxa2xx_spi.h". The pxa2xx_spi master controller driver will uses the configuration whenever the driver communicates with the slave device. All fields are optional. diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c index f1a7703d771..93f59f877fc 100644 --- a/arch/arm/mach-pxa/cm-x255.c +++ b/arch/arm/mach-pxa/cm-x255.c @@ -17,13 +17,13 @@ #include #include +#include #include #include #include #include -#include #include "generic.h" diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index a9926bb7592..b88d601a809 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -19,12 +19,12 @@ #include