From d57854bb1d78ba89ffbfdfd1c3e95b52ed7478ff Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 22 Dec 2008 15:35:31 +0100 Subject: minstrel: fix warning if lowest supported rate index is not 0 This patch fixes the following WARNING (caused by rix_to_ndx): " >WARNING: at net/mac80211/rc80211_minstrel.c:69 minstrel_rate_init+0xd2/0x33a [mac80211]() >[...] >Call Trace: > warn_on_slowpath+0x51/0x75 > _format_mac_addr+0x4c/0x88 > minstrel_rate_init+0xd2/0x33a [mac80211] > print_mac+0x16/0x1b > schedule_hrtimeout_range+0xdc/0x107 > ieee80211_add_station+0x158/0x1bd [mac80211] > nl80211_new_station+0x1b3/0x20b [cfg80211] The reason is that I'm experimenting with "g" only mode on a 802.11 b/g card. Therefore rate_lowest_index returns 4 (= 6Mbit, instead of usual 0 = 1Mbit). Since mi->r array is initialized with zeros in minstrel_alloc_sta, rix_to_ndx has a hard time to find the 6Mbit entry and will trigged the WARNING. Signed-off-by: Christian Lamparter Acked-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 2b3b490a607..3824990d340 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -395,13 +395,15 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_sta_info *mi = priv_sta; struct minstrel_priv *mp = priv; - struct minstrel_rate *mr_ctl; + struct ieee80211_local *local = hw_to_local(mp->hw); + struct ieee80211_rate *ctl_rate; unsigned int i, n = 0; unsigned int t_slot = 9; /* FIXME: get real slot time */ mi->lowest_rix = rate_lowest_index(sband, sta); - mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)]; - mi->sp_ack_dur = mr_ctl->ack_time; + ctl_rate = &sband->bitrates[mi->lowest_rix]; + mi->sp_ack_dur = ieee80211_frame_duration(local, 10, ctl_rate->bitrate, + !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); for (i = 0; i < sband->n_bitrates; i++) { struct minstrel_rate *mr = &mi->r[n]; @@ -416,7 +418,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, mr->rix = i; mr->bitrate = sband->bitrates[i].bitrate / 5; - calc_rate_durations(mi, hw_to_local(mp->hw), mr, + calc_rate_durations(mi, local, mr, &sband->bitrates[i]); /* calculate maximum number of retransmissions before -- cgit v1.2.3-70-g09d2 From dcebf45cdc8384be9009b2b9a825054b64742768 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Mon, 22 Dec 2008 16:39:36 -0500 Subject: mac80211: allow mode change if IBSS is not allowed Changing mode on an interface is not allowed if IBSS is disabled for the current channel. That restriction should only apply when switching to the ad-hoc mode, as it was prior to "cfg80211: handle SIOCGIWMODE/SIOCSIWMODE". Signed-off-by: Pavel Roskin Signed-off-by: John W. Linville --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5abbc3f07dd..b9074824862 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -699,7 +699,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, return 0; /* Setting ad-hoc mode on non-IBSS channel is not supported. */ - if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) + if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS && + type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* -- cgit v1.2.3-70-g09d2 From b973c31a925c6753d84a100673f6b25546ec8b34 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 27 Dec 2008 22:19:49 +0100 Subject: mac80211: initialize RC data for all mesh links upon allocation This patch fixes a panic that might occur, if the device is part of a mesh and tries to send with a higher rate index than "0". kernel BUG at net/mac80211/rate.c:239! invalid opcode: 0000 [#1] SMP [...] Call Trace: <0> ? invoke_tx_handlers+0x474/0xb57 [mac80211] ? __ieee80211_tx_prepare+0x260/0x2a8 [mac80211] ? ieee80211_master_start_xmit+0x300/0x43a [mac80211] ? __qdisc_run+0xde/0x1da ? net_tx_action+0xb4/0x102 Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- net/mac80211/mesh_plink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 929ba542fd7..1159bdb4119 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -107,6 +107,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, sta->flags = WLAN_STA_AUTHORIZED; sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + rate_control_rate_init(sta); return sta; } -- cgit v1.2.3-70-g09d2 From 88843104a19d5896bf67ab6bd685e976240dd04a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:00 +0000 Subject: netfilter 01/09: remove "happy cracking" message Don't spam logs for locally generated short packets. these can only be generated by root. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/iptable_filter.c | 7 +------ net/ipv4/netfilter/iptable_mangle.c | 6 +----- net/ipv4/netfilter/iptable_raw.c | 6 +----- net/ipv4/netfilter/iptable_security.c | 6 +----- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 5 +---- 5 files changed, 5 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c9224310eba..52cb6939d09 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -93,13 +93,8 @@ ipt_local_out_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_filter: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } - return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 69f2c428714..3929d20b9e4 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -132,12 +132,8 @@ ipt_local_hook(unsigned int hook, /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_mangle: ignoring short SOCK_RAW " - "packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } /* Save things which could affect route */ mark = skb->mark; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 8faebfe638f..7f65d18333e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -65,12 +65,8 @@ ipt_local_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_raw: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_raw); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 36f3be3cc42..a52a35f4a58 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -96,12 +96,8 @@ ipt_local_out_hook(unsigned int hook, { /* Somebody is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk(KERN_INFO "iptable_security: ignoring short " - "SOCK_RAW packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index b2141e11575..4beb04fac58 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -145,11 +145,8 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } -- cgit v1.2.3-70-g09d2 From a2bd40ad3151d4d346fd167e01fb84b06f7247fc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 12 Jan 2009 00:06:02 +0000 Subject: netfilter 02/09: bridge: Fix handling of non-IP packets in FORWARD/POST_ROUTING Currently the bridge FORWARD/POST_ROUTING chains treats all non-IPv4 packets as IPv6. This packet fixes that by returning NF_ACCEPT on non-IP packets instead, just as is done in PRE_ROUTING. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a65e43a17fb..9a1cd757ec4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -686,8 +686,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; nf_bridge_pull_encap_header(skb); @@ -828,8 +831,11 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { -- cgit v1.2.3-70-g09d2 From 47e0e1ca13d64eeeb687995fbe4e239e743d7544 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 12 Jan 2009 00:06:03 +0000 Subject: netfilter 03/09: bridge: Disable PPPOE/VLAN processing by default The PPPOE/VLAN processing code in the bridge netfilter is broken by design. The VLAN tag and the PPPOE session ID are an integral part of the packet flow information, yet they're completely ignored by the bridge netfilter. This is potentially a security hole as it treats all VLANs and PPPOE sessions as the same. What's more, it's actually broken for PPPOE as the bridge netfilter tries to trim the packets to the IP length without adjusting the PPPOE header (and adjusting the PPPOE header isn't much better since the PPPOE peer may require the padding to be present). Therefore we should disable this by default. It does mean that people relying on this feature may lose networking depending on how their bridge netfilter rules are configured. However, IMHO the problems this code causes are serious enough to warrant this. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9a1cd757ec4..cf754ace0b7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -58,11 +58,11 @@ static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; static int brnf_call_ip6tables __read_mostly = 1; static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly = 1; -static int brnf_filter_pppoe_tagged __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 0; +static int brnf_filter_pppoe_tagged __read_mostly = 0; #else -#define brnf_filter_vlan_tagged 1 -#define brnf_filter_pppoe_tagged 1 +#define brnf_filter_vlan_tagged 0 +#define brnf_filter_pppoe_tagged 0 #endif static inline __be16 vlan_proto(const struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 656caff20e12ba6e07b4bf342641df5ab33b4e49 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:04 +0000 Subject: netfilter 04/09: x_tables: fix match/target revision lookup Commit 55b69e91 (netfilter: implement NFPROTO_UNSPEC as a wildcard for extensions) broke revision probing for matches and targets that are registered with NFPROTO_UNSPEC. Fix by continuing the search on the NFPROTO_UNSPEC list if nothing is found on the af-specific lists. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/x_tables.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 89837a4eef7..bfbf521f6ea 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -273,6 +273,10 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } @@ -289,6 +293,10 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } -- cgit v1.2.3-70-g09d2 From d61ba9fd55b52a10b8e0ffd39bbc33587d3bfc8d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 12 Jan 2009 00:06:06 +0000 Subject: netfilter 05/09: ebtables: fix inversion in match code Commit 8cc784ee (netfilter: change return types of match functions for ebtables extensions) broke ebtables matches by inverting the sense of match/nomatch. Reported-by: Matt Cross Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8a8743d7d6e..820252aee81 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -79,7 +79,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m, { par->match = m->u.match; par->matchinfo = m->data; - return m->u.match->match(skb, par); + return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; } static inline int ebt_dev_check(char *entry, const struct net_device *device) -- cgit v1.2.3-70-g09d2 From 71320afcdb33b3f0b754ba1fac6a8c77aa469041 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:07 +0000 Subject: netfilter 06/09: nf_conntrack: fix ICMP/ICMPv6 timeout sysctls on big-endian An old bug crept back into the ICMP/ICMPv6 conntrack protocols: the timeout values are defined as unsigned longs, the sysctl's maxsize is set to sizeof(unsigned int). Use unsigned int for the timeout values as in the other conntrack protocols. Reported-by: Jean-Mickael Guerin Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1fd3ef7718b..2a8bee26f43 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -20,7 +20,7 @@ #include #include -static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index bd52151d31e..c455cf4ee75 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -26,7 +26,7 @@ #include #include -static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, -- cgit v1.2.3-70-g09d2 From cd7fcbf1cb6933bfb9171452b4a370c92923544d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 12 Jan 2009 00:06:08 +0000 Subject: netfilter 07/09: simplify nf_conntrack_alloc() error handling nf_conntrack_alloc cannot return NULL, so there is no need to check for NULL before using the value. I have also removed the initialization of ct to NULL in nf_conntrack_alloc, since the value is never used, and since perhaps it might lead one to think that return ct at the end might return NULL. The semantic patch that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @match exists@ expression x, E; position p1,p2; statement S1, S2; @@ x@p1 = nf_conntrack_alloc(...) ... when != x = E ( if (x@p2 == NULL || ...) S1 else S2 | if (x@p2 == NULL && ...) S1 else S2 ) @other_match exists@ expression match.x, E1, E2; position p1!=match.p1,match.p2; @@ x@p1 = E1 ... when != x = E2 x@p2 @ script:python depends on !other_match@ p1 << match.p1; p2 << match.p2; @@ print "%s: call to nf_conntrack_alloc %s bad test %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_netlink.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7e83f74cd5d..90ce9ddb945 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -469,7 +469,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp) { - struct nf_conn *ct = NULL; + struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -551,7 +551,7 @@ init_conntrack(struct net *net, } ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) { + if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 00e8c27130f..3dddec6d2f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1134,7 +1134,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) + if (IS_ERR(ct)) return -ENOMEM; if (!cda[CTA_TIMEOUT]) -- cgit v1.2.3-70-g09d2 From e6210f3be5b13b6cda9c8dad8926818a73c8e6ac Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 12 Jan 2009 00:06:10 +0000 Subject: netfilter 08/09: xt_time: print timezone for user information netfilter: xt_time: print timezone for user information Let users have a way to figure out if their distro set the kernel timezone at all. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_time.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 29375ba8db7..93acaa59d10 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -243,6 +243,17 @@ static struct xt_match xt_time_mt_reg __read_mostly = { static int __init time_mt_init(void) { + int minutes = sys_tz.tz_minuteswest; + + if (minutes < 0) /* east of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is +%02d%02d\n", + -minutes / 60, -minutes % 60); + else /* west of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is -%02d%02d\n", + minutes / 60, minutes % 60); + return xt_register_match(&xt_time_mt_reg); } -- cgit v1.2.3-70-g09d2 From c08513471911cf33cb50249a7ff12848374f7263 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 12 Jan 2009 21:54:16 -0800 Subject: pkt_sched: sch_htb: Consider used jiffies in htb_do_events() Next event time should consider jiffies used for recounting. Otherwise qdisc_watchdog_schedule() triggers hrtimer immediately with the event in the past, and may cause very high ksoftirqd cpu usage (if highres is on). There is also removed checking "event" for zero in htb_dequeue(): it's always true in this place. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5070643ce53..9ca8a26ba50 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -685,8 +685,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - /* too much load - let's continue on next jiffie */ - return q->now + PSCHED_TICKS_PER_SEC / HZ; + /* too much load - let's continue on next jiffie (including above) */ + return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -873,7 +873,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } else event = q->near_ev_cache[level]; - if (event && next_event > event) + if (next_event > event) next_event = event; m = ~q->row_mask[level]; -- cgit v1.2.3-70-g09d2 From a73be040650463eacb95f83d2e6673ac57b4fc59 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 12 Jan 2009 21:54:40 -0800 Subject: pkt_sched: sch_htb: Break all htb_do_events() after 2 jiffies Currently htb_do_events() breaks events recounting for a level after 2 jiffies, but there is no reason to repeat this for next levels and increase delays even more (with softirqs disabled). htb_dequeue_tree() can add to this too, btw. In such a case q->now time is invalid anyway. Thanks to Patrick McHardy for spotting an error around earlier version of this patch. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9ca8a26ba50..2f0f0b04d3f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -661,12 +661,13 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level) +static psched_time_t htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of 1 to simplify things when jiffy is going to be incremented too soon */ - unsigned long stop_at = jiffies + 2; + unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; @@ -845,6 +846,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; + unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); @@ -857,6 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (!sch->q.qlen) goto fin; q->now = psched_get_time(); + start_at = jiffies; next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; @@ -866,7 +869,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) psched_time_t event; if (q->now >= q->near_ev_cache[level]) { - event = htb_do_events(q, level); + event = htb_do_events(q, level, start_at); if (!event) event = q->now + PSCHED_TICKS_PER_SEC; q->near_ev_cache[level] = event; -- cgit v1.2.3-70-g09d2 From 85b9e4fe13de9b35af1dbd50acc5f4978b9119ee Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:37:44 -0500 Subject: mac80211: fix "‘ret’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/mac80211/ht.c: In function ‘ieee80211_start_tx_ba_session’: net/mac80211/ht.c:472: warning: ‘ret’ may be used uninitialized in this function Signed-off-by: John W. Linville --- net/mac80211/ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 5f510a13b9f..c5c0c527109 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -469,7 +469,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) struct ieee80211_sub_if_data *sdata; u16 start_seq_num; u8 *state; - int ret; + int ret = 0; if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; -- cgit v1.2.3-70-g09d2 From 33966dd0e2f68f26943cd9ee93ec6abbc6547a8e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 13 Jan 2009 16:04:36 -0800 Subject: tcp: splice as many packets as possible at once As spotted by Willy Tarreau, current splice() from tcp socket to pipe is not optimal. It processes at most one segment per call. This results in low performance and very high overhead due to syscall rate when splicing from interfaces which do not support LRO. Willy provided a patch inside tcp_splice_read(), but a better fix is to let tcp_read_sock() process as many segments as possible, so that tcp_rcv_space_adjust() and tcp_cleanup_rbuf() are called less often. With this change, splice() behaves like tcp_recvmsg(), being able to consume many skbs in one system call. With typical 1460 bytes of payload per frame, that means splice(SPLICE_F_NONBLOCK) can return 16*1460 = 23360 bytes. Signed-off-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ce572f9dff0..48ada1b2d2c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -522,8 +522,12 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct tcp_splice_state *tss = rd_desc->arg.data; + int ret; - return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); + if (ret > 0) + rd_desc->count -= ret; + return ret; } static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) @@ -531,6 +535,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { .arg.data = tss, + .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); @@ -611,11 +616,13 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, tss.len -= ret; spliced += ret; + if (!timeo) + break; release_sock(sk); lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || + (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } -- cgit v1.2.3-70-g09d2 From 7891cc818967e186be68caac32d84bfd0a3f0bd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Jan 2009 22:17:51 -0800 Subject: ipv6: Fix fib6_dump_table walker leak When a fib6 table dump is prematurely ended, we won't unlink its walker from the list. This causes all sorts of grief for other users of the list later. Reported-by: Chris Caputo Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 29c7c99e69f..52ee1dced2f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -298,6 +298,10 @@ static void fib6_dump_end(struct netlink_callback *cb) struct fib6_walker_t *w = (void*)cb->args[2]; if (w) { + if (cb->args[4]) { + cb->args[4] = 0; + fib6_walker_unlink(w); + } cb->args[2] = 0; kfree(w); } @@ -330,15 +334,12 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, read_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; + if (res <= 0) { + fib6_walker_unlink(w); + cb->args[4] = 0; } - fib6_walker_unlink(w); - cb->args[4] = 0; } -end: + return res; } -- cgit v1.2.3-70-g09d2 From 754fe8d297bfae7b77f7ce866e2fb0c5fb186506 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:09 +0100 Subject: [CVE-2009-0029] System call wrappers part 07 Signed-off-by: Heiko Carstens --- kernel/exit.c | 8 ++++---- kernel/kexec.c | 5 ++--- kernel/sched.c | 4 ++-- kernel/signal.c | 2 +- kernel/sys.c | 7 ++++--- net/socket.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/kernel/exit.c b/kernel/exit.c index fac9b040af2..08895df0eab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1141,7 +1141,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) EXPORT_SYMBOL(complete_and_exit); -asmlinkage long sys_exit(int error_code) +SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } @@ -1182,7 +1182,7 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage long sys_exit_group(int error_code) +SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); /* NOTREACHED */ @@ -1795,8 +1795,8 @@ asmlinkage long sys_waitid(int which, pid_t upid, return ret; } -asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, - int options, struct rusage __user *ru) +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; diff --git a/kernel/kexec.c b/kernel/kexec.c index 3fb855ad6aa..8a6d7b08864 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -934,9 +934,8 @@ struct kimage *kexec_crash_image; static DEFINE_MUTEX(kexec_mutex); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags) +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; int result; diff --git a/kernel/sched.c b/kernel/sched.c index 1a0fdfa5ddf..65c02037b05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,8 +5869,8 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; diff --git a/kernel/signal.c b/kernel/signal.c index 3fe08eaa5de..41f32e08615 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,7 +1961,7 @@ EXPORT_SYMBOL(unblock_all_signals); * System call entry points. */ -asmlinkage long sys_restart_syscall(void) +SYSCALL_DEFINE0(restart_syscall) { struct restart_block *restart = ¤t_thread_info()->restart_block; return restart->fn(restart); diff --git a/kernel/sys.c b/kernel/sys.c index cbe4502c28a..39b192b4003 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,7 +143,7 @@ out: return error; } -asmlinkage long sys_setpriority(int which, int who, int niceval) +SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) { struct task_struct *g, *p; struct user_struct *user; @@ -208,7 +208,7 @@ out: * has been offset by 20 (ie it returns 40..1 instead of -20..19) * to stay compatible. */ -asmlinkage long sys_getpriority(int which, int who) +SYSCALL_DEFINE2(getpriority, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; @@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off); * * reboot doesn't sync: do that yourself before calling this. */ -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) { char buffer[256]; diff --git a/net/socket.c b/net/socket.c index 06603d73c41..cc9b666e58f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1789,7 +1789,7 @@ out_put: * Shutdown a socket. */ -asmlinkage long sys_shutdown(int fd, int how) +SYSCALL_DEFINE2(shutdown, int, fd, int, how) { int err, fput_needed; struct socket *sock; -- cgit v1.2.3-70-g09d2 From 20f37034fb966a1c35894f9fe529fda0b6440101 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:23 +0100 Subject: [CVE-2009-0029] System call wrappers part 21 Signed-off-by: Heiko Carstens --- fs/readdir.c | 6 ++++-- net/socket.c | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/fs/readdir.c b/fs/readdir.c index 8b4c2a0051a..cf6a0e39819 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -187,7 +187,8 @@ efault: return -EFAULT; } -asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * dirent, unsigned int count) +SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct linux_dirent __user *, dirent, unsigned int, count) { struct file * file; struct linux_dirent __user * lastdirent; @@ -268,7 +269,8 @@ efault: return -EFAULT; } -asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * dirent, unsigned int count) +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct file * file; struct linux_dirent64 __user * lastdirent; diff --git a/net/socket.c b/net/socket.c index cc9b666e58f..fdd72c586a1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1356,7 +1356,7 @@ out_fd1: * the protocol layer (having also checked the address is ok). */ -asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) +SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1418,8 +1418,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen, int, flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1502,8 +1502,8 @@ out_fd: goto out_put; } -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen) { return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } @@ -1520,8 +1520,8 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, - int addrlen) +SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, + int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1552,8 +1552,8 @@ out: * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1583,8 +1583,8 @@ out: * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1725,8 +1725,8 @@ asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, int, optlen) { int err, fput_needed; struct socket *sock; @@ -1759,8 +1759,8 @@ out_put: * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) { int err, fput_needed; struct socket *sock; -- cgit v1.2.3-70-g09d2 From 3e0fa65f8ba4fd24b3dcfaf14d5b15eaab0fdc61 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:24 +0100 Subject: [CVE-2009-0029] System call wrappers part 22 Signed-off-by: Heiko Carstens --- net/socket.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index fdd72c586a1..35dd7371752 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1214,7 +1214,7 @@ int sock_create_kern(int family, int type, int protocol, struct socket **res) return __sock_create(&init_net, family, type, protocol, res, 1); } -asmlinkage long sys_socket(int family, int type, int protocol) +SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { int retval; struct socket *sock; @@ -1255,8 +1255,8 @@ out_release: * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, - int __user *usockvec) +SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, + int __user *, usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1385,7 +1385,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) * ready for listening. */ -asmlinkage long sys_listen(int fd, int backlog) +SYSCALL_DEFINE2(listen, int, fd, int, backlog) { struct socket *sock; int err, fput_needed; @@ -1615,9 +1615,9 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, - unsigned flags, struct sockaddr __user *addr, - int addr_len) +SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned, flags, struct sockaddr __user *, addr, + int, addr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1660,7 +1660,8 @@ out: * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) +SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, + unsigned, flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } @@ -1671,9 +1672,9 @@ asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, - unsigned flags, struct sockaddr __user *addr, - int __user *addr_len) +SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, + unsigned, flags, struct sockaddr __user *, addr, + int __user *, addr_len) { struct socket *sock; struct iovec iov; @@ -1815,7 +1816,7 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) * BSD sendmsg interface */ -asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1921,8 +1922,8 @@ out: * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, - unsigned int flags) +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -2045,7 +2046,7 @@ static const unsigned char nargs[19]={ * it is set by the callees. */ -asmlinkage long sys_socketcall(int call, unsigned long __user *args) +SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) { unsigned long a[6]; unsigned long a0, a1; -- cgit v1.2.3-70-g09d2 From f17f5c91ae3bfeb5cfc37fa132a5fdfceb8927be Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 14:36:12 -0800 Subject: gro: Check for GSO packets and packets with frag_list As GRO cannot be applied to packets with frag_list we need to make sure that we reject such packets if they are fed to us, e.g., through a tunnel device. Also there is no point in applying GRO on GSO packets so they too should be rejected. This allows GRO to be used in virtio-net which may produce GSO packets directly but may still benefit from GRO if the other end of it doesn't support GSO. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b715a55cccc..7dec715293b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2392,6 +2392,9 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO)) goto normal; + if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + goto normal; + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; -- cgit v1.2.3-70-g09d2 From fc8c7dc1b29560c016a67a34ccff32a712b5aa86 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 14 Jan 2009 14:55:35 -0800 Subject: xfrm: For 32/64 compatability wrt. xfrm_usersa_info Reported by Jiri Klimes. Fix suggested by Patrick McHardy. Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b95a2d64eb5..7877e7975da 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1914,10 +1914,17 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } #endif +/* For the xfrm_usersa_info cases we have to work around some 32-bit vs. + * 64-bit compatability issues. On 32-bit the structure is 220 bytes, but + * for 64-bit it gets padded out to 224 bytes. Those bytes are just + * padding and don't have any content we care about. Therefore as long + * as we have enough bytes for the content we can make both cases work. + */ + #define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { - [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = 220, /* see above */ [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), @@ -1927,7 +1934,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire), [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire), [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), - [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = 220, /* see above */ [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, -- cgit v1.2.3-70-g09d2 From f557206800801410c30e53ce7a27219b2c4cf0ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:40:03 -0800 Subject: gro: Fix page ref count for skbs freed normally When an skb with page frags is merged into an existing one, we cannibalise its reference count. This is OK when the skb is reused because we set nr_frags to zero in that case. However, for the case where the skb is freed through kfree_skb, we didn't clear nr_frags which causes the page to be freed prematurely. This is fixed by moving the skb resetting into skb_gro_receive. Reported-by: Jeff Kirsher Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ net/core/skbuff.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7dec715293b..60377b6c0a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2491,12 +2491,6 @@ EXPORT_SYMBOL(napi_gro_receive); void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { - skb_shinfo(skb)->nr_frags = 0; - - skb->len -= skb->data_len; - skb->truesize -= skb->data_len; - skb->data_len = 0; - __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5110b359c75..65eac773903 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2602,6 +2602,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; + skb_shinfo(skb)->nr_frags = 0; + + skb->truesize -= skb->data_len; + skb->len -= skb->data_len; + skb->data_len = 0; + NAPI_GRO_CB(skb)->free = 1; goto done; } -- cgit v1.2.3-70-g09d2 From 4e704ee3c2cd38748ca59d835435d6a7e7f6f613 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:41:12 -0800 Subject: gso: Ensure that the packet is long enough When we get a GSO packet from an untrusted source, we need to ensure that it is sufficiently long so that we don't end up crashing. Based on discovery and patch by Ian Campbell. Signed-off-by: Herbert Xu Tested-by: Ian Campbell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48ada1b2d2c..0cd71b84e48 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2389,7 +2389,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) unsigned int seq; __be32 delta; unsigned int oldlen; - unsigned int len; + unsigned int mss; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2405,10 +2405,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) oldlen = (u16)~skb->len; __skb_pull(skb, thlen); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - int mss; if (unlikely(type & ~(SKB_GSO_TCPV4 | @@ -2419,7 +2422,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; - mss = skb_shinfo(skb)->gso_size; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; @@ -2430,8 +2432,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (IS_ERR(segs)) goto out; - len = skb_shinfo(skb)->gso_size; - delta = htonl(oldlen + (thlen + len)); + delta = htonl(oldlen + (thlen + mss)); skb = segs; th = tcp_hdr(skb); @@ -2447,7 +2448,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) csum_fold(csum_partial(skb_transport_header(skb), thlen, skb->csum)); - seq += len; + seq += mss; skb = skb->next; th = tcp_hdr(skb); -- cgit v1.2.3-70-g09d2 From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ibm_newemac/mal.c | 4 +++- drivers/net/ibm_newemac/mal.h | 2 ++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c index ecf9798987f..2a2fc17b287 100644 --- a/drivers/net/ibm_newemac/mal.c +++ b/drivers/net/ibm_newemac/mal.c @@ -613,7 +613,9 @@ static int __devinit mal_probe(struct of_device *ofdev, INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - netif_napi_add(NULL, &mal->napi, mal_poll, + init_dummy_netdev(&mal->dummy_dev); + + netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, CONFIG_IBM_NEW_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h index 2f0a8736084..9ededfbf072 100644 --- a/drivers/net/ibm_newemac/mal.h +++ b/drivers/net/ibm_newemac/mal.h @@ -214,6 +214,8 @@ struct mal_instance { int index; spinlock_t lock; + struct net_device dummy_dev; + unsigned int features; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4647604c7ca..ec54785d34f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,6 +795,7 @@ struct net_device NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state; /* Called from unregister, can be used to call free_netdev */ @@ -1077,6 +1078,8 @@ extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int init_dummy_netdev(struct net_device *dev); + extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 60377b6c0a8..8d675975d85 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4430,6 +4430,45 @@ err_uninit: goto out; } +/** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + /** * register_netdev - register a network device * @dev: device to register -- cgit v1.2.3-70-g09d2 From c53a6ee88b0a91bd012ef1b7988c0b93dae6f24d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jan 2009 21:06:55 -0800 Subject: can: fix slowpath issue in hrtimer callback function Due to the loopback functionality in can_send() we can not invoke it from hardirq context which was done inside the bcm_tx_timeout_handler() hrtimer callback: [ 700.361154] [] warn_slowpath+0x80/0xb6 [ 700.361163] [] valid_state+0x125/0x136 [ 700.361171] [] mark_lock+0x18e/0x332 [ 700.361180] [] __lock_acquire+0x12e/0xb1e [ 700.361189] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361198] [] dev_queue_xmit+0x191/0x479 [ 700.361206] [] __local_bh_disable+0x2b/0x64 [ 700.361213] [] dev_queue_xmit+0x191/0x479 [ 700.361225] [] can_send+0xd7/0x11a [can] [ 700.361235] [] bcm_can_tx+0x9d/0xd9 [can_bcm] [ 700.361245] [] bcm_tx_timeout_handler+0x6a/0xbc [can_bcm] [ 700.361255] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361263] [] __run_hrtimer+0x5a/0x86 [ 700.361273] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361282] [] hrtimer_interrupt+0xb9/0x110 This patch moves the rest of the functionality from the hrtimer callback to the already existing tasklet to fix this slowpath problem. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 1649c8ab2c2..b7c7d465113 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -347,51 +347,54 @@ static void bcm_tx_timeout_tsklet(unsigned long data) struct bcm_op *op = (struct bcm_op *)data; struct bcm_msg_head msg_head; - /* create notification to user */ - msg_head.opcode = TX_EXPIRED; - msg_head.flags = op->flags; - msg_head.count = op->count; - msg_head.ival1 = op->ival1; - msg_head.ival2 = op->ival2; - msg_head.can_id = op->can_id; - msg_head.nframes = 0; - - bcm_send_to_user(op, &msg_head, NULL, 0); -} - -/* - * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions - */ -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - enum hrtimer_restart ret = HRTIMER_NORESTART; - if (op->kt_ival1.tv64 && (op->count > 0)) { op->count--; - if (!op->count && (op->flags & TX_COUNTEVT)) - tasklet_schedule(&op->tsklet); + if (!op->count && (op->flags & TX_COUNTEVT)) { + + /* create notification to user */ + msg_head.opcode = TX_EXPIRED; + msg_head.flags = op->flags; + msg_head.count = op->count; + msg_head.ival1 = op->ival1; + msg_head.ival2 = op->ival2; + msg_head.can_id = op->can_id; + msg_head.nframes = 0; + + bcm_send_to_user(op, &msg_head, NULL, 0); + } } if (op->kt_ival1.tv64 && (op->count > 0)) { /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival1); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); } else { if (op->kt_ival2.tv64) { /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); } } +} - return ret; +/* + * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions + */ +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + + tasklet_schedule(&op->tsklet); + + return HRTIMER_NORESTART; } /* -- cgit v1.2.3-70-g09d2 From 6364853dabe78dda7ffdfb8803c1e56c0fff2e43 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 15 Jan 2009 13:51:29 -0800 Subject: 9p: disallow RDMA if RDMA CM isn't available If INET=y and INFINIBAND=y, but IPV6=m then INFINIBAND_ADDR_TRANS is set to n and the RDMA CM functions rdma_connect() et al are not built. However, the current config dependencies allow NET_9P_RDMA to be selected in this, which leads to a build failure. Fix this by adding a dependency on INFINIBAND_ADDR_TRANS to disallow NET_9P_RDMA in this case. Reported-by: Randy Dunlap Signed-off-by: Roland Dreier Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 0663f99e977..7ed75c7bd5d 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -23,7 +23,7 @@ config NET_9P_VIRTIO guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. -- cgit v1.2.3-70-g09d2