From fa86d322d89995fef1bfb5cc768b89d8c22ea0d9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 24 Mar 2008 14:48:59 -0700 Subject: [NEIGH]: Fix race between pneigh deletion and ipv6's ndisc_recv_ns (v3). Proxy neighbors do not have any reference counting, so any caller of pneigh_lookup (unless it's a netlink triggered add/del routine) should _not_ perform any actions on the found proxy entry. There's one exception from this rule - the ipv6's ndisc_recv_ns() uses found entry to check the flags for NTF_ROUTER. This creates a race between the ndisc and pneigh_delete - after the pneigh is returned to the caller, the nd_tbl.lock is dropped and the deleting procedure may proceed. One of the fixes would be to add a reference counting, but this problem exists for ndisc only. Besides such a patch would be too big for -rc4. So I propose to introduce a __pneigh_lookup() which is supposed to be called with the lock held and use it in ndisc code to check the flags on alive pneigh entry. Changes from v2: As David noticed, Exported the __pneigh_lookup() to ipv6 module. The checkpatch generates a warning on it, since the EXPORT_SYMBOL does not follow the symbol itself, but in this file all the exports come at the end, so I decided no to break this harmony. Changes from v1: Fixed comments from YOSHIFUJI - indentation of prototype in header and the pndisc_check_router() name - and a compilation fix, pointed by Daniel - the is_routed was (falsely) considered as uninitialized by gcc. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/neighbour.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index ebbfb509822..64a5f0120b5 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -218,6 +218,10 @@ extern unsigned long neigh_rand_reach_time(unsigned long base); extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); +extern struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, + struct net *net, + const void *key, + struct net_device *dev); extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); extern void neigh_app_ns(struct neighbour *n); -- cgit v1.2.3-70-g09d2 From df9dcb4588aca9cc243cf1f3f454361a84e1cbdb Mon Sep 17 00:00:00 2001 From: Kazunori MIYAZAWA Date: Mon, 24 Mar 2008 14:51:51 -0700 Subject: [IPSEC]: Fix inter address family IPsec tunnel handling. Signed-off-by: Kazunori MIYAZAWA Signed-off-by: David S. Miller --- include/net/xfrm.h | 23 +++++++++++++++++++ net/ipv4/xfrm4_mode_tunnel.c | 2 +- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 2 +- net/ipv6/xfrm6_output.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_input.c | 22 +++++++++++++++--- net/xfrm/xfrm_output.c | 18 ++++++++++++++- net/xfrm/xfrm_state.c | 54 ++++++++++++++++++++++++++++++++++++++------ net/xfrm/xfrm_user.c | 7 ++---- 10 files changed, 113 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 619c53bc3cd..4e6f9568cbe 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -204,6 +204,7 @@ struct xfrm_state * transformer. */ const struct xfrm_type *type; struct xfrm_mode *inner_mode; + struct xfrm_mode *inner_mode_iaf; struct xfrm_mode *outer_mode; /* Security context */ @@ -387,6 +388,27 @@ enum { extern int xfrm_register_mode(struct xfrm_mode *mode, int family); extern int xfrm_unregister_mode(struct xfrm_mode *mode, int family); +static inline int xfrm_af2proto(unsigned int family) +{ + switch(family) { + case AF_INET: + return IPPROTO_IPIP; + case AF_INET6: + return IPPROTO_IPV6; + default: + return 0; + } +} + +static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +{ + if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || + (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) + return x->inner_mode; + else + return x->inner_mode_iaf; +} + struct xfrm_tmpl { /* id in template is interpreted as: @@ -1253,6 +1275,7 @@ extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr); extern int xfrm_output_resume(struct sk_buff *skb, int err); extern int xfrm_output(struct sk_buff *skb); +extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_extract_header(struct sk_buff *skb); extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 8dee617ee90..584e6d74e3a 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; - top_iph->protocol = x->inner_mode->afinfo->proto; + top_iph->protocol = xfrm_af2proto(skb->dst->ops->family); /* DS disclosed */ top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5a58a81802..8c3180adddb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -56,7 +56,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 0c742faaa30..e20529b4c82 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = x->inner_mode->afinfo->proto; + top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family); dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 79ccfb08073..0af823cf7f1 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -62,7 +62,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/key/af_key.c b/net/key/af_key.c index 8b5f486ac80..e9ef9af4a53 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1219,7 +1219,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 62188c6a06d..75279402ccf 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -84,14 +84,21 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_mode *inner_mode = x->inner_mode; int err; err = x->outer_mode->afinfo->extract_input(x, skb); if (err) return err; - skb->protocol = x->inner_mode->afinfo->eth_proto; - return x->inner_mode->input2(x, skb); + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + return -EAFNOSUPPORT; + } + + skb->protocol = inner_mode->afinfo->eth_proto; + return inner_mode->input2(x, skb); } EXPORT_SYMBOL(xfrm_prepare_input); @@ -101,6 +108,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) __be32 seq; struct xfrm_state *x; xfrm_address_t *daddr; + struct xfrm_mode *inner_mode; unsigned int family; int decaps = 0; int async = 0; @@ -207,7 +215,15 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - if (x->inner_mode->input(x, skb)) { + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + goto drop; + } + + if (inner_mode->input(x, skb)) { XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 569d377932c..2519129c6d2 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -124,7 +124,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) if (!x) return dst_output(skb); - err = nf_hook(x->inner_mode->afinfo->family, + err = nf_hook(skb->dst->ops->family, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm_output2); if (unlikely(err != 1)) @@ -193,4 +193,20 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } + +int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_mode *inner_mode; + if (x->sel.family == AF_UNSPEC) + inner_mode = xfrm_ip2inner_mode(x, + xfrm_af2proto(skb->dst->ops->family)); + else + inner_mode = x->inner_mode; + + if (inner_mode == NULL) + return -EAFNOSUPPORT; + return inner_mode->afinfo->extract_output(x, skb); +} + EXPORT_SYMBOL_GPL(xfrm_output); +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ba65e82941..58f1f9347b5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -388,6 +388,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->coaddr); if (x->inner_mode) xfrm_put_mode(x->inner_mode); + if (x->inner_mode_iaf) + xfrm_put_mode(x->inner_mode_iaf); if (x->outer_mode) xfrm_put_mode(x->outer_mode); if (x->type) { @@ -523,6 +525,8 @@ struct xfrm_state *xfrm_state_alloc(void) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; + x->inner_mode = NULL; + x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -796,7 +800,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, selector. */ if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, x->sel.family) || + if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || @@ -1944,6 +1948,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) int xfrm_init_state(struct xfrm_state *x) { struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *inner_mode; int family = x->props.family; int err; @@ -1962,13 +1967,48 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; - x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); - if (x->inner_mode == NULL) - goto error; - if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) - goto error; + if (x->sel.family != AF_UNSPEC) { + inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) { + xfrm_put_mode(inner_mode); + goto error; + } + + x->inner_mode = inner_mode; + } else { + struct xfrm_mode *inner_mode_iaf; + + inner_mode = xfrm_get_mode(x->props.mode, AF_INET); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode); + goto error; + } + + inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6); + if (inner_mode_iaf == NULL) + goto error; + + if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode_iaf); + goto error; + } + + if (x->props.family == AF_INET) { + x->inner_mode = inner_mode; + x->inner_mode_iaf = inner_mode_iaf; + } else { + x->inner_mode = inner_mode_iaf; + x->inner_mode_iaf = inner_mode; + } + } x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f971ca5645f..5d96f2728dc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -288,12 +288,9 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - /* - * Set inner address family if the KM left it as zero. - * See comment in validate_tmpl. - */ - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = p->family; + } /* -- cgit v1.2.3-70-g09d2 From 732c8bd590625e8bc0b88313b82930e336b2bec4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 26 Mar 2008 16:51:09 -0700 Subject: [IPSEC]: Fix BEET output The IPv6 BEET output function is incorrectly including the inner header in the payload to be protected. This causes a crash as the packet doesn't actually have that many bytes for a second header. The IPv4 BEET output on the other hand is broken when it comes to handling an inner IPv6 header since it always assumes an inner IPv4 header. This patch fixes both by making sure that neither BEET output function touches the inner header at all. All access is now done through the protocol-independent cb structure. Two new attributes are added to make this work, the IP header length and the IPv4 option length. They're filled in by the inner mode's output function. Thanks to Joakim Koskela for finding this problem. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/ipv4/xfrm4_mode_beet.c | 11 +++++------ net/ipv4/xfrm4_state.c | 2 ++ net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_state.c | 2 ++ 5 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4e6f9568cbe..0d255ae008b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -552,6 +552,9 @@ struct xfrm_mode_skb_cb { __be16 id; __be16 frag_off; + /* IP header length (excluding options or extension headers). */ + u8 ihl; + /* TOS for IPv4, class for IPv6. */ u8 tos; @@ -561,6 +564,9 @@ struct xfrm_mode_skb_cb { /* Protocol for IPv4, NH for IPv6. */ u8 protocol; + /* Option length for IPv4, zero for IPv6. */ + u8 optlen; + /* Used by IPv6 only, zero for IPv4. */ u8 flow_lbl[3]; }; diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index b47030ba162..9c798abce73 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -39,13 +39,11 @@ static void xfrm4_beet_make_header(struct sk_buff *skb) static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) { struct ip_beet_phdr *ph; - struct iphdr *iph, *top_iph; + struct iphdr *top_iph; int hdrlen, optlen; - iph = ip_hdr(skb); - hdrlen = 0; - optlen = iph->ihl * 4 - sizeof(*iph); + optlen = XFRM_MODE_SKB_CB(skb)->optlen; if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); @@ -53,11 +51,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*iph); + skb->transport_header = skb->network_header + sizeof(*top_iph); xfrm4_beet_make_header(skb); - ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); + ph = (struct ip_beet_phdr *) + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); top_iph = ip_hdr(skb); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index fdeebe68a37..07735ed280d 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -52,10 +52,12 @@ int xfrm4_extract_header(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = iph->id; XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; XFRM_MODE_SKB_CB(skb)->tos = iph->tos; XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 0527d11c1ae..d6ce400f585 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -45,6 +45,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index dc817e035e2..ff1e1db8e23 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -174,10 +174,12 @@ int xfrm6_extract_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = 0; XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); -- cgit v1.2.3-70-g09d2 From a5a04819c5740cb1aa217af2cc8f5ef26f33d744 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Fri, 28 Mar 2008 16:28:36 -0700 Subject: [LLC]: station source mac address kill unnecessary llc_station_mac_sa. Signed-off-by: Joonwoo Park Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/llc.h | 1 - net/llc/llc_core.c | 8 +------- net/llc/llc_station.c | 6 +++--- 3 files changed, 4 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/llc.h b/include/net/llc.h index f5024583fc8..7940da1606e 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -65,7 +65,6 @@ struct llc_sap { extern struct list_head llc_sap_list; extern rwlock_t llc_sap_list_lock; -extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 248b5903bb1..00de27cef46 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -25,8 +25,6 @@ LIST_HEAD(llc_sap_list); DEFINE_RWLOCK(llc_sap_list_lock); -unsigned char llc_station_mac_sa[ETH_ALEN]; - /** * llc_sap_alloc - allocates and initializes sap. * @@ -37,8 +35,8 @@ static struct llc_sap *llc_sap_alloc(void) struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); if (sap) { + /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; - memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); rwlock_init(&sap->sk_list.lock); atomic_set(&sap->refcnt, 1); } @@ -167,10 +165,6 @@ static int __init llc_init(void) if (dev != NULL) dev = next_net_device(dev); - if (dev != NULL) - memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN); - else - memset(llc_station_mac_sa, 0, ETH_ALEN); dev_add_pack(&llc_packet_type); dev_add_pack(&llc_tr_packet_type); return 0; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 6f2ea209032..959e7f31833 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -259,7 +259,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127); - rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, llc_station_mac_sa); + rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, skb->dev->dev_addr); if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); @@ -283,7 +283,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127); - rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); + rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); @@ -307,7 +307,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); - rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); + rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da); if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); -- cgit v1.2.3-70-g09d2 From f83f1768f833cb45bc93429fdc552252a4f55ac3 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Mon, 31 Mar 2008 21:02:47 -0700 Subject: [LLC]: skb allocation size for responses Allocate the skb for llc responses with the received packet size by using the size adjustable llc_frame_alloc. Don't allocate useless extra payload. Cleanup magic numbers. So, this fixes oops. Reported by Jim Westfall: kernel: skb_over_panic: text:c0541fc7 len:1000 put:997 head:c166ac00 data:c166ac2f tail:0xc166b017 end:0xc166ac80 dev:eth0 kernel: ------------[ cut here ]------------ kernel: kernel BUG at net/core/skbuff.c:95! Signed-off-by: Joonwoo Park Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 4 ++-- include/net/llc_sap.h | 7 +++++-- net/llc/llc_c_ac.c | 47 +++++++++++++++++++++++++---------------------- net/llc/llc_pdu.c | 2 +- net/llc/llc_s_ac.c | 9 +++++++-- net/llc/llc_sap.c | 27 ++++++++++++++++++++++++--- net/llc/llc_station.c | 13 ++++++++++--- 7 files changed, 74 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 4a8f58b17e4..75b8e2968c9 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -381,7 +381,7 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, 3); + skb_put(skb, sizeof(struct llc_xid_info)); } /** @@ -406,7 +406,7 @@ static inline void llc_pdu_init_as_xid_rsp(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; - skb_put(skb, 3); + skb_put(skb, sizeof(struct llc_xid_info)); } /* LLC Type 2 FRMR response information field format */ diff --git a/include/net/llc_sap.h b/include/net/llc_sap.h index 2c56dbece72..ed25bec2f64 100644 --- a/include/net/llc_sap.h +++ b/include/net/llc_sap.h @@ -1,5 +1,8 @@ #ifndef LLC_SAP_H #define LLC_SAP_H + +#include + /* * Copyright (c) 1997 by Procom Technology,Inc. * 2001-2003 by Arnaldo Carvalho de Melo @@ -19,8 +22,8 @@ struct sock; extern void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb); extern void llc_save_primitive(struct sock *sk, struct sk_buff* skb, unsigned char prim); -extern struct sk_buff *llc_alloc_frame(struct sock *sk, - struct net_device *dev); +extern struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, + u8 type, u32 data_size); extern void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 860140caa6e..71a00225bdb 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -198,7 +198,7 @@ int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -223,7 +223,7 @@ int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -249,7 +249,7 @@ int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -282,7 +282,8 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) llc_pdu_decode_pf_bit(skb, &f_bit); else f_bit = 0; - nskb = llc_alloc_frame(sk, llc->dev); + nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_frmr_info)); if (nskb) { struct llc_sap *sap = llc->sap; @@ -306,7 +307,8 @@ int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_frmr_info)); if (nskb) { struct llc_sap *sap = llc->sap; @@ -336,7 +338,8 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); llc_pdu_decode_pf_bit(skb, &f_bit); - nskb = llc_alloc_frame(sk, llc->dev); + nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_frmr_info)); if (nskb) { struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -424,7 +427,7 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk, struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -459,7 +462,7 @@ int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -483,7 +486,7 @@ int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -507,7 +510,7 @@ int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -531,7 +534,7 @@ int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -555,7 +558,7 @@ int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -579,7 +582,7 @@ int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -615,7 +618,7 @@ int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -639,7 +642,7 @@ int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -663,7 +666,7 @@ int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -688,7 +691,7 @@ int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -712,7 +715,7 @@ int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -736,7 +739,7 @@ int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -770,7 +773,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); if (nskb) { struct llc_sap *sap = llc->sap; @@ -799,7 +802,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) u8 f_bit; int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_U, 0); llc_pdu_decode_pf_bit(skb, &f_bit); if (nskb) { @@ -956,7 +959,7 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, { int rc = -ENOBUFS; struct llc_sock *llc = llc_sk(sk); - struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev, LLC_PDU_TYPE_S, 0); if (nskb) { struct llc_sap *sap = llc->sap; diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c index fa8324396db..2e6cb79196b 100644 --- a/net/llc/llc_pdu.c +++ b/net/llc/llc_pdu.c @@ -241,7 +241,7 @@ void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu, FRMR_INFO_SET_PDU_INFO_2LONG_IND(frmr_info, vzyxw); FRMR_INFO_SET_PDU_INVALID_Nr_IND(frmr_info, vzyxw); FRMR_INFO_SET_PDU_INVALID_Ns_IND(frmr_info, vzyxw); - skb_put(skb, 5); + skb_put(skb, sizeof(struct llc_frmr_info)); } /** diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index ac3d93b210d..a94bd56bcac 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -103,7 +103,8 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(NULL, skb->dev); + nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_xid_info)); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, @@ -144,11 +145,15 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) u8 mac_da[ETH_ALEN], mac_sa[ETH_ALEN], dsap; struct sk_buff *nskb; int rc = 1; + u32 data_size; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(NULL, skb->dev); + + /* The test request command is type U (llc_len = 3) */ + data_size = ntohs(eth_hdr(skb)->h_proto) - 3; + nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 2525165e2e8..e2ddde75501 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -24,20 +24,41 @@ #include #include +static int llc_mac_header_len(unsigned short devtype) +{ + switch (devtype) { + case ARPHRD_ETHER: + case ARPHRD_LOOPBACK: + return sizeof(struct ethhdr); +#ifdef CONFIG_TR + case ARPHRD_IEEE802_TR: + return sizeof(struct trh_hdr); +#endif + } + return 0; +} + /** * llc_alloc_frame - allocates sk_buff for frame * @dev: network device this skb will be sent over + * @type: pdu type to allocate + * @data_size: data size to allocate * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ -struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev) +struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, + u8 type, u32 data_size) { - struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); + int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + struct sk_buff *skb; + + hlen += llc_mac_header_len(dev->type); + skb = alloc_skb(hlen + data_size, GFP_ATOMIC); if (skb) { skb_reset_mac_header(skb); - skb_reserve(skb, 50); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->protocol = htons(ETH_P_802_2); diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 959e7f31833..83da1333949 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -253,7 +253,8 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) { int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_xid_info)); if (!nskb) goto out; @@ -274,7 +275,8 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff* nskb = llc_alloc_frame(NULL, skb->dev); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, + sizeof(struct llc_xid_info)); if (!nskb) goto out; @@ -298,7 +300,12 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); + u32 data_size; + struct sk_buff *nskb; + + /* The test request command is type U (llc_len = 3) */ + data_size = ntohs(eth_hdr(skb)->h_proto) - 3; + nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); if (!nskb) goto out; -- cgit v1.2.3-70-g09d2 From 882bebaaca4bb1484078d44ef011f918c0e1e14e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 7 Apr 2008 22:33:07 -0700 Subject: [TCP]: tcp_simple_retransmit can cause S+L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes Bugzilla #10384 tcp_simple_retransmit does L increment without any checking whatsoever for overflowing S+L when Reno is in use. The simplest scenario I can currently think of is rather complex in practice (there might be some more straightforward cases though). Ie., if mss is reduced during mtu probing, it may end up marking everything lost and if some duplicate ACKs arrived prior to that sacked_out will be non-zero as well, leading to S+L > packets_out, tcp_clean_rtx_queue on the next cumulative ACK or tcp_fastretrans_alert on the next duplicate ACK will fix the S counter. More straightforward (but questionable) solution would be to just call tcp_reset_reno_sack() in tcp_simple_retransmit but it would negatively impact the probe's retransmission, ie., the retransmissions would not occur if some duplicate ACKs had arrived. So I had to add reno sacked_out reseting to CA_Loss state when the first cumulative ACK arrives (this stale sacked_out might actually be the explanation for the reports of left_out overflows in kernel prior to 2.6.23 and S+L overflow reports of 2.6.24). However, this alone won't be enough to fix kernel before 2.6.24 because it is building on top of the commit 1b6d427bb7e ([TCP]: Reduce sacked_out with reno when purging write_queue) to keep the sacked_out from overflowing. Signed-off-by: Ilpo Järvinen Reported-by: Alessandro Suardi Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 24 ++++++++++++++++++------ net/ipv4/tcp_output.c | 3 +++ 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7de4ea3a04d..4fd3eb2f8ec 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -752,6 +752,8 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } +extern int tcp_limit_reno_sacked(struct tcp_sock *tp); + /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7d0958785bf..b4812c3cbbc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1625,13 +1625,11 @@ out: return flag; } -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. +/* Limits sacked_out so that sum with lost_out isn't ever larger than + * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -static void tcp_check_reno_reordering(struct sock *sk, const int addend) +int tcp_limit_reno_sacked(struct tcp_sock *tp) { - struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1639,8 +1637,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(sk, tp->packets_out + addend, 0); + return 1; } + return 0; +} + +/* If we receive more dupacks than we expected counting segments + * in assumption of absent reordering, interpret this as reordering. + * The only another reason could be bug in receiver TCP. + */ +static void tcp_check_reno_reordering(struct sock *sk, const int addend) +{ + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_limit_reno_sacked(tp)) + tcp_update_reordering(sk, tp->packets_out + addend, 0); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ @@ -2600,6 +2610,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; + if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); if (!tcp_try_undo_loss(sk)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 72b9350006f..d29ef79c00c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1808,6 +1808,9 @@ void tcp_simple_retransmit(struct sock *sk) if (!lost) return; + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + tcp_verify_left_out(tp); /* Don't muck with the congestion window here. -- cgit v1.2.3-70-g09d2