From 2527e681fd4fd4231c2e04f09d7b04d3cab8eefe Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 20 Jul 2006 11:25:50 +0300 Subject: IB/mad: Validate MADs for spec compliance Validate MADs sent by userspace clients for spec compliance with C13-18.1.1 (prevent duplicate requests and responses sent on the same port). Without this, RMPP transactions get aborted because of duplicate packets. This patch is similar to that provided by Jack Morgenstein. Signed-off-by: Sean Hefty Signed-off-by: Michael S. Tsirkin Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/rdma/ib_mad.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 5ff77558013..585d28e960d 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -75,6 +75,7 @@ #define IB_MGMT_METHOD_TRAP_REPRESS 0x07 #define IB_MGMT_METHOD_RESP 0x80 +#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) #define IB_MGMT_MAX_METHODS 128 @@ -246,6 +247,12 @@ struct ib_mad_send_buf { int retries; }; +/** + * ib_response_mad - Returns if the specified MAD has been generated in + * response to a sent request or trap. + */ +int ib_response_mad(struct ib_mad *mad); + /** * ib_get_rmpp_resptime - Returns the RMPP response time. * @rmpp_hdr: An RMPP header. -- cgit v1.2.3-70-g09d2 From 2266d8886f64c66e0a4e61e3e1c19dbc27ed00d4 Mon Sep 17 00:00:00 2001 From: Guillaume Chazarain Date: Sun, 23 Jul 2006 23:37:24 -0700 Subject: [PKT_SCHED]: Fix regression in PSCHED_TADD{,2}. In PSCHED_TADD and PSCHED_TADD2, if delta is less than tv.tv_usec (so, less than USEC_PER_SEC too) then tv_res will be smaller than tv. The affectation "(tv_res).tv_usec = __delta;" is wrong. The fix is to revert to the original code before 4ee303dfeac6451b402e3d8512723d3a0f861857 and change the 'if' in 'while'. [Shuya MAEDA: "while (__delta >= USEC_PER_SEC){ ... }" instead of "while (__delta > USEC_PER_SEC){ ... }"] Signed-off-by: Guillaume Chazarain Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 1925c65e617..f6afee73235 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -169,23 +169,17 @@ psched_tod_diff(int delta_sec, int bound) #define PSCHED_TADD2(tv, delta, tv_res) \ ({ \ - int __delta = (delta); \ - (tv_res) = (tv); \ - while(__delta >= USEC_PER_SEC){ \ - (tv_res).tv_sec++; \ - __delta -= USEC_PER_SEC; \ - } \ + int __delta = (tv).tv_usec + (delta); \ + (tv_res).tv_sec = (tv).tv_sec; \ + while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \ (tv_res).tv_usec = __delta; \ }) #define PSCHED_TADD(tv, delta) \ ({ \ - int __delta = (delta); \ - while(__delta >= USEC_PER_SEC){ \ - (tv).tv_sec++; \ - __delta -= USEC_PER_SEC; \ - } \ - (tv).tv_usec = __delta; \ + (tv).tv_usec += (delta); \ + while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \ + (tv).tv_usec -= USEC_PER_SEC; } \ }) /* Set/check that time is in the "past perfect"; -- cgit v1.2.3-70-g09d2 From 37182d1bd3264cf9c0dce3408bee48af0755de7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2006 15:30:28 -0700 Subject: [NET]: Remove CONFIG_HAVE_ARCH_DEV_ALLOC_SKB skbuff.h has an #ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB to allow architectures to reimplement __dev_alloc_skb. It's not set on any architecture and now that we have an architecture-overrideable NET_SKB_PAD there is not point at all to have one either. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0bf31b83578..f9875797664 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1066,7 +1066,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB /** * __dev_alloc_skb - allocate an skbuff for sending * @length: length to allocate @@ -1087,9 +1086,6 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, skb_reserve(skb, NET_SKB_PAD); return skb; } -#else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); -#endif /** * dev_alloc_skb - allocate an skbuff for sending -- cgit v1.2.3-70-g09d2 From b4e54de8d34afe7fcf08bfe91070d9dfeae6ed27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jul 2006 15:31:14 -0700 Subject: [NET]: Correct dev_alloc_skb kerneldoc dev_alloc_skb is designated for RX descriptors, not TX. (Some drivers use it for the latter anyway, but that's a different story) Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f9875797664..4307e764ef0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1067,7 +1067,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) } /** - * __dev_alloc_skb - allocate an skbuff for sending + * __dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * @@ -1088,7 +1088,7 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, } /** - * dev_alloc_skb - allocate an skbuff for sending + * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The -- cgit v1.2.3-70-g09d2 From 29ed46015dd61f99d203ec7ab307ccf92d2d0cf2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 22 Jul 2006 02:05:07 -0700 Subject: [SPARC]: Fix SA_STATIC_ALLOC value. It alises IRQF_SHARED which causes all kinds of problems. Signed-off-by: David S. Miller --- include/asm-sparc/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc/signal.h b/include/asm-sparc/signal.h index 0ae5084c427..d03a21c97ab 100644 --- a/include/asm-sparc/signal.h +++ b/include/asm-sparc/signal.h @@ -168,7 +168,7 @@ struct sigstack { * statically allocated data.. which is NOT GOOD. * */ -#define SA_STATIC_ALLOC 0x80 +#define SA_STATIC_ALLOC 0x8000 #endif #include -- cgit v1.2.3-70-g09d2 From 10ea6ac895418bd0d23900e3330daa6ba0836d26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 24 Jul 2006 22:54:55 -0700 Subject: [NETFILTER]: bridge netfilter: add deferred output hooks to feature-removal-schedule Add bridge netfilter deferred output hooks to feature-removal-schedule and disable them by default. Until their removal they will be activated by the physdev match when needed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 16 ++++++++++++++++ include/linux/netfilter_bridge.h | 2 ++ net/bridge/br_netfilter.c | 5 +++++ net/netfilter/xt_physdev.c | 15 +++++++++++++++ 4 files changed, 38 insertions(+) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 9d3a0775a11..87851efb022 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -258,3 +258,19 @@ Why: These drivers never compiled since they were added to the kernel Who: Jean Delvare --------------------------- + +What: Bridge netfilter deferred IPv4/IPv6 output hook calling +When: January 2007 +Why: The deferred output hooks are a layering violation causing unusual + and broken behaviour on bridge devices. Examples of things they + break include QoS classifation using the MARK or CLASSIFY targets, + the IPsec policy match and connection tracking with VLANs on a + bridge. Their only use is to enable bridge output port filtering + within iptables with the physdev match, which can also be done by + combining iptables and ebtables using netfilter marks. Until it + will get removed the hook deferral is disabled by default and is + only enabled when needed. + +Who: Patrick McHardy + +--------------------------- diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 87764022cc6..31f02ba036c 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -79,6 +79,8 @@ struct bridge_skb_cb { __u32 ipv4; } daddr; }; + +extern int brnf_deferred_hooks; #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index cbc8a389a0a..05b3de88824 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -61,6 +61,9 @@ static int brnf_filter_vlan_tagged = 1; #define brnf_filter_vlan_tagged 1 #endif +int brnf_deferred_hooks; +EXPORT_SYMBOL_GPL(brnf_deferred_hooks); + static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -890,6 +893,8 @@ static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; else if (ip->version == 6 && !brnf_call_ip6tables) return NF_ACCEPT; + else if (!brnf_deferred_hooks) + return NF_ACCEPT; #endif if (hook == NF_IP_POST_ROUTING) return NF_ACCEPT; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 5fe4c9df17f..a9f4f6f3c62 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -113,6 +113,21 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; + if (brnf_deferred_hooks == 0 && + info->bitmask & XT_PHYSDEV_OP_OUT && + (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || + info->invert & XT_PHYSDEV_OP_BRIDGED) && + hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING))) { + printk(KERN_WARNING "physdev match: using --physdev-out in the " + "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " + "traffic is deprecated and breaks other things, it will " + "be removed in January 2007. See Documentation/" + "feature-removal-schedule.txt for details. This doesn't " + "affect you in case you're using it for purely bridged " + "traffic.\n"); + brnf_deferred_hooks = 1; + } return 1; } -- cgit v1.2.3-70-g09d2 From 153d7f3fcae7ed4e19328549aa9467acdfbced10 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 26 Jul 2006 15:40:07 +0200 Subject: [PATCH] Reorganize the cpufreq cpu hotplug locking to not be totally bizare The patch below moves the cpu hotplugging higher up in the cpufreq layering; this is needed to avoid recursive taking of the cpu hotplug lock and to otherwise detangle the mess. The new rules are: 1. you must do lock_cpu_hotplug() around the following functions: __cpufreq_driver_target __cpufreq_governor (for CPUFREQ_GOV_LIMITS operation only) __cpufreq_set_policy 2. governer methods (.governer) must NOT take the lock_cpu_hotplug() lock in any way; they are called with the lock taken already 3. if your governer spawns a thread that does things, like calling __cpufreq_driver_target, your thread must honor rule #1. 4. the policy lock and other cpufreq internal locks nest within the lock_cpu_hotplug() lock. I'm not entirely happy about how the __cpufreq_governor rule ended up (conditional locking rule depending on the argument) but basically all callers pass this as a constant so it's not too horrible. The patch also removes the cpufreq_governor() function since during the locking audit it turned out to be entirely unused (so no need to fix it) The patch works on my testbox, but it could use more testing (otoh... it can't be much worse than the current code) Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- drivers/cpufreq/cpufreq.c | 40 +++++++++++++++------------------- drivers/cpufreq/cpufreq_conservative.c | 2 -- drivers/cpufreq/cpufreq_ondemand.c | 4 ++-- drivers/cpufreq/cpufreq_userspace.c | 3 +++ include/linux/cpufreq.h | 3 --- 5 files changed, 23 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d328186f77..bc1088d9b37 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -364,10 +364,12 @@ static ssize_t store_##file_name \ if (ret != 1) \ return -EINVAL; \ \ + lock_cpu_hotplug(); \ mutex_lock(&policy->lock); \ ret = __cpufreq_set_policy(policy, &new_policy); \ policy->user_policy.object = policy->object; \ mutex_unlock(&policy->lock); \ + unlock_cpu_hotplug(); \ \ return ret ? ret : count; \ } @@ -1197,20 +1199,18 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); *********************************************************************/ +/* Must be called with lock_cpu_hotplug held */ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { int retval = -EINVAL; - lock_cpu_hotplug(); dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, target_freq, relation); if (cpu_online(policy->cpu) && cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); - unlock_cpu_hotplug(); - return retval; } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); @@ -1225,17 +1225,23 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, if (!policy) return -EINVAL; + lock_cpu_hotplug(); mutex_lock(&policy->lock); ret = __cpufreq_driver_target(policy, target_freq, relation); mutex_unlock(&policy->lock); + unlock_cpu_hotplug(); cpufreq_cpu_put(policy); return ret; } EXPORT_SYMBOL_GPL(cpufreq_driver_target); +/* + * Locking: Must be called with the lock_cpu_hotplug() lock held + * when "event" is CPUFREQ_GOV_LIMITS + */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { @@ -1257,24 +1263,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) } -int cpufreq_governor(unsigned int cpu, unsigned int event) -{ - int ret = 0; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) - return -EINVAL; - - mutex_lock(&policy->lock); - ret = __cpufreq_governor(policy, event); - mutex_unlock(&policy->lock); - - cpufreq_cpu_put(policy); - return ret; -} -EXPORT_SYMBOL_GPL(cpufreq_governor); - - int cpufreq_register_governor(struct cpufreq_governor *governor) { struct cpufreq_governor *t; @@ -1342,6 +1330,9 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) EXPORT_SYMBOL(cpufreq_get_policy); +/* + * Locking: Must be called with the lock_cpu_hotplug() lock held + */ static int __cpufreq_set_policy(struct cpufreq_policy *data, struct cpufreq_policy *policy) { int ret = 0; @@ -1436,6 +1427,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) if (!data) return -EINVAL; + lock_cpu_hotplug(); + /* lock this CPU */ mutex_lock(&data->lock); @@ -1446,6 +1439,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy) data->user_policy.governor = data->governor; mutex_unlock(&data->lock); + + unlock_cpu_hotplug(); cpufreq_cpu_put(data); return ret; @@ -1469,6 +1464,7 @@ int cpufreq_update_policy(unsigned int cpu) if (!data) return -ENODEV; + lock_cpu_hotplug(); mutex_lock(&data->lock); dprintk("updating policy for CPU %u\n", cpu); @@ -1494,7 +1490,7 @@ int cpufreq_update_policy(unsigned int cpu) ret = __cpufreq_set_policy(data, &policy); mutex_unlock(&data->lock); - + unlock_cpu_hotplug(); cpufreq_cpu_put(data); return ret; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index b3ebc8f0197..c4c578defab 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -525,7 +525,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - lock_cpu_hotplug(); mutex_lock(&dbs_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target( @@ -536,7 +535,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, this_dbs_info->cur_policy, policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); - unlock_cpu_hotplug(); break; } return 0; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 178f0c547eb..52cf1f02182 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -309,7 +309,9 @@ static void do_dbs_timer(void *data) if (!dbs_info->enable) return; + lock_cpu_hotplug(); dbs_check_cpu(dbs_info); + unlock_cpu_hotplug(); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); } @@ -412,7 +414,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: - lock_cpu_hotplug(); mutex_lock(&dbs_mutex); if (policy->max < this_dbs_info->cur_policy->cur) __cpufreq_driver_target(this_dbs_info->cur_policy, @@ -423,7 +424,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, policy->min, CPUFREQ_RELATION_L); mutex_unlock(&dbs_mutex); - unlock_cpu_hotplug(); break; } return 0; diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 44ae5e5b94c..a06c204589c 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,7 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); + lock_cpu_hotplug(); mutex_lock(&userspace_mutex); if (!cpu_is_managed[policy->cpu]) goto err; @@ -92,6 +94,7 @@ static int cpufreq_set(unsigned int freq, struct cpufreq_policy *policy) err: mutex_unlock(&userspace_mutex); + unlock_cpu_hotplug(); return ret; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 35e137636b0..4ea39fee99c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -172,9 +172,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); -/* pass an event to the cpufreq governor */ -int cpufreq_governor(unsigned int cpu, unsigned int event); - int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -- cgit v1.2.3-70-g09d2 From 92f282988b4ce3967ee8399f7d1184ebfa04e48b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jul 2006 16:49:21 -0700 Subject: [SPARC64]: Fix quad-float multiply emulation. Something is wrong with the 3-multiply (vs. 4-multiply) optimized version of _FP_MUL_MEAT_2_*(), so just use the slower version which actually computes correct values. Noticed by Rene Rebe Signed-off-by: David S. Miller --- include/asm-sparc64/sfp-machine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc64/sfp-machine.h b/include/asm-sparc64/sfp-machine.h index 5015bb8d6c3..89d42431efb 100644 --- a/include/asm-sparc64/sfp-machine.h +++ b/include/asm-sparc64/sfp-machine.h @@ -34,7 +34,7 @@ #define _FP_MUL_MEAT_D(R,X,Y) \ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) #define _FP_MUL_MEAT_Q(R,X,Y) \ - _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) -- cgit v1.2.3-70-g09d2 From b8cfac4c2f3d12d0f4cbe6f992d945f2fdfc098d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jul 2006 17:57:32 -0700 Subject: [SPARC64]: Fix typo in pgprot_noncached(). The sun4v code sequence was or'ing in the sun4u pte bits by mistake. Signed-off-by: David S. Miller --- include/asm-sparc64/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index 03f5bc9b6be..1ba19eb34ce 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -339,7 +339,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) " .section .sun4v_2insn_patch, \"ax\"\n" " .word 661b\n" " andn %0, %4, %0\n" - " or %0, %3, %0\n" + " or %0, %5, %0\n" " .previous\n" : "=r" (val) : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), -- cgit v1.2.3-70-g09d2 From 361934849e9c0418950bedf667732f36337d88b9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 28 Jul 2006 08:54:59 +0200 Subject: [PATCH] ide: option to disable cache flushes for buggy drives Some drives claim they support cache flushing, but get seriously confused if you try. Add this option to be able to boot with barriers enabled by default. Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 2 +- drivers/ide/ide.c | 5 ++++- include/linux/ide.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index f712e4cfd9d..7cf3eb02352 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -776,7 +776,7 @@ static void update_ordered(ide_drive_t *drive) * not available so we don't need to recheck that. */ capacity = idedisk_capacity(drive); - barrier = ide_id_has_flush_cache(id) && + barrier = ide_id_has_flush_cache(id) && !drive->noflush && (drive->addressing == 0 || capacity <= (1ULL << 28) || ide_id_has_flush_cache_ext(id)); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 05fbd9298db..defd4b4bd37 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1539,7 +1539,7 @@ static int __init ide_setup(char *s) const char *hd_words[] = { "none", "noprobe", "nowerr", "cdrom", "serialize", "autotune", "noautotune", "minus8", "swapdata", "bswap", - "minus11", "remap", "remap63", "scsi", NULL }; + "noflush", "remap", "remap63", "scsi", NULL }; unit = s[2] - 'a'; hw = unit / MAX_DRIVES; unit = unit % MAX_DRIVES; @@ -1578,6 +1578,9 @@ static int __init ide_setup(char *s) case -10: /* "bswap" */ drive->bswap = 1; goto done; + case -11: /* noflush */ + drive->noflush = 1; + goto done; case -12: /* "remap" */ drive->remap_0_to_1 = 1; goto done; diff --git a/include/linux/ide.h b/include/linux/ide.h index dc7abef1096..99620451d95 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -571,6 +571,7 @@ typedef struct ide_drive_s { u8 waiting_for_dma; /* dma currently in progress */ u8 unmask; /* okay to unmask other irqs */ u8 bswap; /* byte swap data */ + u8 noflush; /* don't attempt flushes */ u8 dsc_overlap; /* DSC overlap */ u8 nice1; /* give potential excess bandwidth */ -- cgit v1.2.3-70-g09d2 From a4045dff782a8692637c24a0222120082c887caa Mon Sep 17 00:00:00 2001 From: bibo mao Date: Fri, 28 Jul 2006 14:44:48 +0200 Subject: [PATCH] x86_64: Enlarge debug stack for nested kprobes In x86_64 platform, INT1 and INT3 trap stack is IST stack called DEBUG_STACK, when INT1/INT3 trap happens, system will switch to DEBUG_STACK by hardware. Current DEBUG_STACK size is 4K, when int1/int3 trap happens, kernel will minus current DEBUG_STACK IST value by 4k. But if int3/int1 trap is nested, it will destroy other vector's IST stack. This patch modifies this, it sets DEBUG_STACK size as 8K and allows two level of nested int1/int3 trap. Kprobe DEBUG_STACK may be nested, because kprobe handler may be probed by other kprobes. Thanks jbeulich for pointing out error in the first patch. [AK: nested kprobes are pretty dubious. Hopefully one nest will be enough. This will cost 8K per CPU (4K more than before)] Signed-off-by: bibo, mao Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index f7bf875aae4..10f346165ca 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -19,7 +19,7 @@ #define EXCEPTION_STACK_ORDER 0 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) -#define DEBUG_STACK_ORDER EXCEPTION_STACK_ORDER +#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) #define IRQSTACK_ORDER 2 -- cgit v1.2.3-70-g09d2 From e3f2ddeac718c768fdac4b7fe69d465172f788a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 29 Jul 2006 05:17:57 +0200 Subject: [PATCH] pi-futex: robust-futex exit Fix robust PI-futexes to be properly unlocked on unexpected exit. For this to work the kernel has to know whether a futex is a PI or a non-PI one, because the semantics are different. Since the space in relevant glibc data structures is extremely scarce, the best solution is to encode the 'PI' information in bit 0 of the robust list pointer. Existing (non-PI) glibc robust futexes have this bit always zero, so the ABI is kept. New glibc with PI-robust-futexes will set this bit. Further fixes from Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Ulrich Drepper Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/futex.h | 3 +- kernel/futex.c | 91 +++++++++++++++++++++++++++++++++++---------------- kernel/futex_compat.c | 34 ++++++++++++++----- 3 files changed, 89 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34c3a215f2c..d097b5b72bc 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -96,7 +96,8 @@ struct robust_list_head { long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); +extern int +handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); diff --git a/kernel/futex.c b/kernel/futex.c index f59003b1d8f..dda2049692a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) } /* - * We are the first waiter - try to look up the real owner and - * attach the new pi_state to it: + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when the owner died bit is set + * and TID = 0: */ pid = uval & FUTEX_TID_MASK; + if (!pid && (uval & FUTEX_OWNER_DIED)) + return -ESRCH; p = futex_find_get_task(pid); if (!p) return -ESRCH; @@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) * kept enabled while there is PI state around. We must also * preserve the owner died bit.) */ - newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; - - inc_preempt_count(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + newval = FUTEX_WAITERS | new_owner->pid; - if (curval == -EFAULT) - return -EFAULT; - if (curval != uval) - return -EINVAL; + inc_preempt_count(); + curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + dec_preempt_count(); + if (curval == -EFAULT) + return -EFAULT; + if (curval != uval) + return -EINVAL; + } spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); @@ -1443,9 +1447,11 @@ retry_locked: * again. If it succeeds then we can return without waking * anyone else up: */ - inc_preempt_count(); - uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + inc_preempt_count(); + uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); + dec_preempt_count(); + } if (unlikely(uval == -EFAULT)) goto pi_faulted; @@ -1478,9 +1484,11 @@ retry_locked: /* * No waiters - kernel unlocks the futex: */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; + if (!(uval & FUTEX_OWNER_DIED)) { + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; + } out_unlock: spin_unlock(&hb->lock); @@ -1699,9 +1707,9 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { - u32 uval, nval; + u32 uval, nval, mval; retry: if (get_user(uval, uaddr)) @@ -1718,20 +1726,44 @@ retry: * thread-death.) The rest of the cleanup is done in * userspace. */ - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, - uval | FUTEX_OWNER_DIED); + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); + if (nval == -EFAULT) return -1; if (nval != uval) goto retry; - if (uval & FUTEX_WAITERS) - futex_wake(uaddr, 1); + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi) { + if (uval & FUTEX_WAITERS) + futex_wake(uaddr, 1); + } } return 0; } +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int fetch_robust_entry(struct robust_list __user **entry, + struct robust_list __user **head, int *pi) +{ + unsigned long uentry; + + if (get_user(uentry, (unsigned long *)head)) + return -EFAULT; + + *entry = (void *)(uentry & ~1UL); + *pi = uentry & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned long futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(entry, &head->list.next)) + if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: @@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(pending, &head->list_op_pending)) + if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; + if (pending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(entry, &entry->next)) + if (fetch_robust_entry(&entry, &entry->next, &pi)) return; /* * Avoid excessively long or circular lists: diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d1d92b441fb..d1aab1a452c 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -12,6 +12,23 @@ #include + +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t *head, int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi; compat_uptr_t uentry, upending; - unsigned int limit = ROBUST_LIST_LIMIT; compat_long_t futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(uentry, &head->list.next)) + if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) return; - entry = compat_ptr(uentry); /* * Fetch the relative futex offset: */ @@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(upending, &head->list_op_pending)) + if (fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pi)) return; - pending = compat_ptr(upending); if (upending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pi); while (compat_ptr(uentry) != &head->list) { /* @@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(uentry, (compat_uptr_t *)&entry->next)) + if (fetch_robust_entry(&uentry, &entry, + (compat_uptr_t *)&entry->next, &pi)) return; - entry = compat_ptr(uentry); /* * Avoid excessively long or circular lists: */ -- cgit v1.2.3-70-g09d2