From 1fbc78439291627642517f15b9b91f3125588143 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 25 Mar 2011 20:33:23 -0700 Subject: ipv4: do not ignore route errors The "ipv4: Inline fib_semantic_match into check_leaf" change forgets to return the route errors. check_leaf should return the same results as fib_table_lookup. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90a3ff60559..b92c86f6e9b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1365,9 +1365,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, err = fib_props[fa->fa_type].error; if (err) { #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_miss++; + t->stats.semantic_match_passed++; #endif - return 1; + return err; } if (fi->fib_flags & RTNH_F_DEAD) continue; -- cgit v1.2.3-70-g09d2 From 3bc07321ccc236f693ce1b6a8786f0a2e38bb87e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Mar 2011 21:08:28 +0000 Subject: xfrm: Force a dst refcount before entering the xfrm type handlers Crypto requests might return asynchronous. In this case we leave the rcu protected region, so force a refcount on the skb's destination entry before we enter the xfrm type input/output handlers. This fixes a crash when a route is deleted whilst sending IPsec data that is transformed by an asynchronous algorithm. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 2 ++ net/xfrm/xfrm_output.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 872065ca7f8..341cd1189f8 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -190,6 +190,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.low = seq; XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; + skb_dst_force(skb); + nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 1aba03f449c..8f3f0eedc5a 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); + skb_dst_force(skb); + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out_exit; -- cgit v1.2.3-70-g09d2 From e433430a0ca9cc1b851a83ac3b305e955b64880a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Mar 2011 21:09:32 +0000 Subject: dst: Clone child entry in skb_dst_pop We clone the child entry in skb_dst_pop before we call skb_dst_drop(). Otherwise we might kill the child right before we return it to the caller. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/xfrm/xfrm_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 2a46cbaef92..75b95df4afe 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -345,7 +345,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) { - struct dst_entry *child = skb_dst(skb)->child; + struct dst_entry *child = dst_clone(skb_dst(skb)->child); skb_dst_drop(skb); return child; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8f3f0eedc5a..47bacd8c025 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -96,7 +96,7 @@ resume: err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -- cgit v1.2.3-70-g09d2 From d50e7e3604778bfc2dc40f440e0742dbae399d54 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sat, 19 Mar 2011 20:14:30 +0000 Subject: irda: prevent heap corruption on invalid nickname Invalid nicknames containing only spaces will result in an underflow in a memcpy size calculation, subsequently destroying the heap and panicking. v2 also catches the case where the provided nickname is longer than the buffer size, which can result in controllable heap corruption. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7c567b8aa89..2bb2beb6a37 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket * ap, while(isspace(start[length - 1])) length--; + DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5, + -EINVAL, CTRL_ERROR, "Invalid nickname.\n"); + /* Copy the name for later reuse */ memcpy(ap->rname, start + 5, length - 5); ap->rname[length - 5] = '\0'; -- cgit v1.2.3-70-g09d2 From d370af0ef7951188daeb15bae75db7ba57c67846 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sun, 20 Mar 2011 15:32:06 +0000 Subject: irda: validate peer name and attribute lengths Length fields provided by a peer for names and attributes may be longer than the destination array sizes. Validate lengths to prevent stack buffer overflows. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/irda/iriap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 5b743bdd89b..36477538cea 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self, n = 1; name_len = fp[n++]; + + IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;); + memcpy(name, fp+n, name_len); n+=name_len; name[name_len] = '\0'; attr_len = fp[n++]; + + IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;); + memcpy(attr, fp+n, attr_len); n+=attr_len; attr[attr_len] = '\0'; -- cgit v1.2.3-70-g09d2 From be20250c13f88375345ad99950190685eda51eb8 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sat, 19 Mar 2011 20:43:43 +0000 Subject: ROSE: prevent heap corruption with bad facilities When parsing the FAC_NATIONAL_DIGIS facilities field, it's possible for a remote host to provide more digipeaters than expected, resulting in heap corruption. Check against ROSE_MAX_DIGIS to prevent overflows, and abort facilities parsing on failure. Additionally, when parsing the FAC_CCITT_DEST_NSAP and FAC_CCITT_SRC_NSAP facilities fields, a remote host can provide a length of less than 10, resulting in an underflow in a memcpy size, causing a kernel panic due to massive heap corruption. A length of greater than 20 results in a stack overflow of the callsign array. Abort facilities parsing on these invalid length values. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/rose/rose_subr.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 1734abba26a..174d51c9ce3 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -290,10 +290,15 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * facilities->source_ndigis = 0; facilities->dest_ndigis = 0; for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) { - if (pt[6] & AX25_HBIT) + if (pt[6] & AX25_HBIT) { + if (facilities->dest_ndigis >= ROSE_MAX_DIGIS) + return -1; memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN); - else + } else { + if (facilities->source_ndigis >= ROSE_MAX_DIGIS) + return -1; memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN); + } } } p += l + 2; @@ -333,6 +338,11 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac case 0xC0: l = p[1]; + + /* Prevent overflows*/ + if (l < 10 || l > 20) + return -1; + if (*p == FAC_CCITT_DEST_NSAP) { memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); @@ -373,12 +383,16 @@ int rose_parse_facilities(unsigned char *p, switch (*p) { case FAC_NATIONAL: /* National */ len = rose_parse_national(p + 1, facilities, facilities_len - 1); + if (len < 0) + return 0; facilities_len -= len + 1; p += len + 1; break; case FAC_CCITT: /* CCITT */ len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); + if (len < 0) + return 0; facilities_len -= len + 1; p += len + 1; break; -- cgit v1.2.3-70-g09d2 From e0bccd315db0c2f919e7fcf9cb60db21d9986f52 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 20 Mar 2011 06:48:05 +0000 Subject: rose: Add length checks to CALL_REQUEST parsing Define some constant offsets for CALL_REQUEST based on the description at and the definition of ROSE as using 10-digit (5-byte) addresses. Use them consistently. Validate all implicit and explicit facilities lengths. Validate the address length byte rather than either trusting or assuming its value. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/rose.h | 8 ++++- net/rose/af_rose.c | 8 ++--- net/rose/rose_loopback.c | 13 ++++++- net/rose/rose_route.c | 20 +++++++---- net/rose/rose_subr.c | 91 ++++++++++++++++++++++++++++++------------------ 5 files changed, 93 insertions(+), 47 deletions(-) diff --git a/include/net/rose.h b/include/net/rose.h index 5ba9f02731e..555dd198aab 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -14,6 +14,12 @@ #define ROSE_MIN_LEN 3 +#define ROSE_CALL_REQ_ADDR_LEN_OFF 3 +#define ROSE_CALL_REQ_ADDR_LEN_VAL 0xAA /* each address is 10 digits */ +#define ROSE_CALL_REQ_DEST_ADDR_OFF 4 +#define ROSE_CALL_REQ_SRC_ADDR_OFF 9 +#define ROSE_CALL_REQ_FACILITIES_OFF 14 + #define ROSE_GFI 0x10 #define ROSE_Q_BIT 0x80 #define ROSE_D_BIT 0x40 @@ -214,7 +220,7 @@ extern void rose_requeue_frames(struct sock *); extern int rose_validate_nr(struct sock *, unsigned short); extern void rose_write_internal(struct sock *, int); extern int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *); -extern int rose_parse_facilities(unsigned char *, struct rose_facilities_struct *); +extern int rose_parse_facilities(unsigned char *, unsigned int, struct rose_facilities_struct *); extern void rose_disconnect(struct sock *, int, int, int); /* rose_timer.c */ diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5ee0c62046a..a80aef6e3d1 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -978,7 +978,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros struct sock *make; struct rose_sock *make_rose; struct rose_facilities_struct facilities; - int n, len; + int n; skb->sk = NULL; /* Initially we don't know who it's for */ @@ -987,9 +987,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros */ memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); return 0; } diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index ae4a9d99aec..344456206b7 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param) unsigned int lci_i, lci_o; while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + if (skb->len < ROSE_MIN_LEN) { + kfree_skb(skb); + continue; + } lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); frametype = skb->data[2]; - dest = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) { + kfree_skb(skb); + continue; + } + dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i; skb_reset_transport_header(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 88a77e90e7e..08dcd2f29cd 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -861,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) unsigned int lci, new_lci; unsigned char cause, diagnostic; struct net_device *dev; - int len, res = 0; + int res = 0; char buf[11]; #if 0 @@ -869,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) return res; #endif + if (skb->len < ROSE_MIN_LEN) + return res; frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); - src_addr = (rose_address *)(skb->data + 9); - dest_addr = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) + return res; + src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); + dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); spin_lock_bh(&rose_neigh_list_lock); spin_lock_bh(&rose_route_list_lock); @@ -1010,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) goto out; } - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); goto out; } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 174d51c9ce3..f6c71caa94b 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype) *dptr++ = ROSE_GFI | lci1; *dptr++ = lci2; *dptr++ = frametype; - *dptr++ = 0xAA; + *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL; memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); @@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; if (*p == FAC_NATIONAL_RAND) facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); p += 3; @@ -260,32 +264,48 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; + if (len < 2 + l) + return -1; if (*p == FAC_NATIONAL_DEST_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); facilities->source_ndigis = 1; } } else if (*p == FAC_NATIONAL_SRC_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); facilities->dest_ndigis = 1; } } else if (*p == FAC_NATIONAL_FAIL_CALL) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); } else if (*p == FAC_NATIONAL_FAIL_ADD) { + if (l < 1 + ROSE_ADDR_LEN) + return -1; memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); } else if (*p == FAC_NATIONAL_DIGIS) { + if (l % AX25_ADDR_LEN) + return -1; fac_national_digis_received = 1; facilities->source_ndigis = 0; facilities->dest_ndigis = 0; @@ -319,24 +339,32 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; p += 3; n += 3; len -= 3; break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; /* Prevent overflows*/ @@ -365,49 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac return n; } -int rose_parse_facilities(unsigned char *p, +int rose_parse_facilities(unsigned char *p, unsigned packet_len, struct rose_facilities_struct *facilities) { int facilities_len, len; facilities_len = *p++; - if (facilities_len == 0) + if (facilities_len == 0 || (unsigned)facilities_len > packet_len) return 0; - while (facilities_len > 0) { - if (*p == 0x00) { - facilities_len--; - p++; - - switch (*p) { - case FAC_NATIONAL: /* National */ - len = rose_parse_national(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - case FAC_CCITT: /* CCITT */ - len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - default: - printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); - facilities_len--; - p++; - break; - } - } else - break; /* Error in facilities format */ + while (facilities_len >= 3 && *p == 0x00) { + facilities_len--; + p++; + + switch (*p) { + case FAC_NATIONAL: /* National */ + len = rose_parse_national(p + 1, facilities, facilities_len - 1); + break; + + case FAC_CCITT: /* CCITT */ + len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); + break; + + default: + printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); + len = 1; + break; + } + + if (len < 0) + return 0; + if (WARN_ON(len >= facilities_len)) + return 0; + facilities_len -= len + 1; + p += len + 1; } - return 1; + return facilities_len == 0; } static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) -- cgit v1.2.3-70-g09d2 From 3b261ade4224852ed841ecfd13876db812846e96 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 22 Mar 2011 01:59:47 +0000 Subject: net: remove useless comments in net/core/dev.c The code itself can explain what it is doing, no need these comments. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/core/dev.c | 54 ------------------------------------------------------ 1 file changed, 54 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f453370131a..9b23dcefae6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1140,9 +1140,6 @@ static int __dev_open(struct net_device *dev) ASSERT_RTNL(); - /* - * Is it even present? - */ if (!netif_device_present(dev)) return -ENODEV; @@ -1151,9 +1148,6 @@ static int __dev_open(struct net_device *dev) if (ret) return ret; - /* - * Call device private open method - */ set_bit(__LINK_STATE_START, &dev->state); if (ops->ndo_validate_addr) @@ -1162,31 +1156,12 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - /* - * If it went open OK then: - */ - if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { - /* - * Set the flags. - */ dev->flags |= IFF_UP; - - /* - * Enable NET_DMA - */ net_dmaengine_get(); - - /* - * Initialize multicasting status - */ dev_set_rx_mode(dev); - - /* - * Wakeup transmit queue engine - */ dev_activate(dev); } @@ -1209,22 +1184,13 @@ int dev_open(struct net_device *dev) { int ret; - /* - * Is it already up? - */ if (dev->flags & IFF_UP) return 0; - /* - * Open device - */ ret = __dev_open(dev); if (ret < 0) return ret; - /* - * ... and announce new interface. - */ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_UP, dev); @@ -1240,10 +1206,6 @@ static int __dev_close_many(struct list_head *head) might_sleep(); list_for_each_entry(dev, head, unreg_list) { - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1272,15 +1234,7 @@ static int __dev_close_many(struct list_head *head) if (ops->ndo_stop) ops->ndo_stop(dev); - /* - * Device is now down. - */ - dev->flags &= ~IFF_UP; - - /* - * Shutdown NET_DMA - */ net_dmaengine_put(); } @@ -1309,9 +1263,6 @@ static int dev_close_many(struct list_head *head) __dev_close_many(head); - /* - * Tell people we are down - */ list_for_each_entry(dev, head, unreg_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); @@ -1371,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro); static int dev_boot_phase = 1; -/* - * Device change register/unregister. These are not inline or static - * as we export them to the world. - */ - /** * register_netdevice_notifier - register a network notifier block * @nb: notifier -- cgit v1.2.3-70-g09d2 From 53914b67993c724cec585863755c9ebc8446e83b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 22 Mar 2011 08:27:25 +0000 Subject: can: make struct proto const can_ioctl is the only reason for struct proto to be non-const. script/check-patch.pl suggests struct proto be const. Setting the reference to the common can_ioctl() in all CAN protocols directly removes the need to make the struct proto writable in af_can.c Signed-off-by: Kurt Van Dijck Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/can/core.h | 9 +++++---- net/can/af_can.c | 9 +++------ net/can/bcm.c | 4 ++-- net/can/raw.c | 4 ++-- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 6c507bea275..6f70a6d3a16 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -36,10 +36,10 @@ * @prot: pointer to struct proto structure. */ struct can_proto { - int type; - int protocol; - struct proto_ops *ops; - struct proto *prot; + int type; + int protocol; + const struct proto_ops *ops; + struct proto *prot; }; /* function prototypes for the CAN networklayer core (af_can.c) */ @@ -58,5 +58,6 @@ extern void can_rx_unregister(struct net_device *dev, canid_t can_id, void *data); extern int can_send(struct sk_buff *skb, int loop); +extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); #endif /* CAN_CORE_H */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 702be5a2c95..733d66f1b05 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -95,7 +95,7 @@ struct s_pstats can_pstats; /* receive list statistics */ * af_can socket functions */ -static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -108,6 +108,7 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } } +EXPORT_SYMBOL(can_ioctl); static void can_sock_destruct(struct sock *sk) { @@ -698,13 +699,9 @@ int can_proto_register(struct can_proto *cp) printk(KERN_ERR "can: protocol %d already registered\n", proto); err = -EBUSY; - } else { + } else proto_tab[proto] = cp; - /* use generic ioctl function if not defined by module */ - if (!cp->ops->ioctl) - cp->ops->ioctl = can_ioctl; - } spin_unlock(&proto_tab_lock); if (err < 0) diff --git a/net/can/bcm.c b/net/can/bcm.c index 092dc88a7c6..871a0ad5102 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1569,7 +1569,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, return size; } -static struct proto_ops bcm_ops __read_mostly = { +static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, .bind = sock_no_bind, @@ -1578,7 +1578,7 @@ static struct proto_ops bcm_ops __read_mostly = { .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, - .ioctl = NULL, /* use can_ioctl() from af_can.c */ + .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/net/can/raw.c b/net/can/raw.c index 883e9d74fdd..649acfa7c70 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -742,7 +742,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, return size; } -static struct proto_ops raw_ops __read_mostly = { +static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, .bind = raw_bind, @@ -751,7 +751,7 @@ static struct proto_ops raw_ops __read_mostly = { .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, - .ioctl = NULL, /* use can_ioctl() from af_can.c */ + .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, -- cgit v1.2.3-70-g09d2 From ee6f0988a69b3a81bcea0871418ecf5db332149c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 24 Mar 2011 02:34:32 +0000 Subject: can: c_can: disable one shot mode until driver is fixed This patch disables the one shot mode, until the driver has been fixed and tested to support it. > I'm quite sure I've seen a situation where msg_obj 17 "seemed" to be > pending, while msg_obj 18 and 19 already have been transmitted. But > in that case, I enabled ONESHOT for the can interface, which enables > the DA mode (automatic retransmission is disabled). Reported-by: Jan Altenberg Signed-off-by: Marc Kleine-Budde Signed-off-by: Kurt Van Dijck Cc: Bhupesh Sharma Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/c_can/c_can.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 110eda01843..d0bffb08aef 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -588,14 +588,9 @@ static void c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); - if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) - /* disable automatic retransmission */ - priv->write_reg(priv, &priv->regs->control, - CONTROL_DISABLE_AR); - else - /* enable automatic retransmission */ - priv->write_reg(priv, &priv->regs->control, - CONTROL_ENABLE_AR); + /* enable automatic retransmission */ + priv->write_reg(priv, &priv->regs->control, + CONTROL_ENABLE_AR); if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY & CAN_CTRLMODE_LOOPBACK)) { @@ -1112,8 +1107,7 @@ struct net_device *alloc_c_can_dev(void) priv->can.bittiming_const = &c_can_bittiming_const; priv->can.do_set_mode = c_can_set_mode; priv->can.do_get_berr_counter = c_can_get_berr_counter; - priv->can.ctrlmode_supported = CAN_CTRLMODE_ONE_SHOT | - CAN_CTRLMODE_LOOPBACK | + priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING; -- cgit v1.2.3-70-g09d2 From b0052b088cf0cb688b4630c1d520c57276da71a5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 24 Mar 2011 02:34:33 +0000 Subject: can: c_can_platform: fix irq check in probe This patch fixes the check in the probe function whether a IRQ was supplied to the driver. The original driver check the irq "struct resource *" against <= 0. Use "platform_get_irq" instead. Signed-off-by: Marc Kleine-Budde Cc: Bhupesh Sharma Signed-off-by: David S. Miller --- drivers/net/can/c_can/c_can_platform.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index e629b961ae2..cc90824f2c9 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -73,7 +73,8 @@ static int __devinit c_can_plat_probe(struct platform_device *pdev) void __iomem *addr; struct net_device *dev; struct c_can_priv *priv; - struct resource *mem, *irq; + struct resource *mem; + int irq; #ifdef CONFIG_HAVE_CLK struct clk *clk; @@ -88,8 +89,8 @@ static int __devinit c_can_plat_probe(struct platform_device *pdev) /* get the platform data */ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!mem || (irq <= 0)) { + irq = platform_get_irq(pdev, 0); + if (!mem || irq <= 0) { ret = -ENODEV; goto exit_free_clk; } @@ -117,7 +118,7 @@ static int __devinit c_can_plat_probe(struct platform_device *pdev) priv = netdev_priv(dev); - dev->irq = irq->start; + dev->irq = irq; priv->regs = addr; #ifdef CONFIG_HAVE_CLK priv->can.clock.freq = clk_get_rate(clk); -- cgit v1.2.3-70-g09d2 From dc760b375e50a47847d4942811bd9679beeb5535 Mon Sep 17 00:00:00 2001 From: Jan Altenberg Date: Sun, 27 Mar 2011 18:24:10 -0700 Subject: can: c_can: Fix tx_bytes accounting The current SocketCAN implementation for the Bosch c_can cell doesn't account the TX bytes correctly, because it calls c_can_inval_msg_object() (which clears the msg ctrl register) before reading the DLC value: for (/* nix */; (priv->tx_next - priv->tx_echo) > 0; priv->tx_echo++) { msg_obj_no = get_tx_echo_msg_obj(priv); c_can_inval_msg_object(dev, 0, msg_obj_no); val = c_can_read_reg32(priv, &priv->regs->txrqst1); if (!(val & (1 << msg_obj_no))) { can_get_echo_skb(dev, msg_obj_no - C_CAN_MSG_OBJ_TX_FIRST); stats->tx_bytes += priv->read_reg(priv, &priv->regs->ifregs[0].msg_cntrl) & IF_MCONT_DLC_MASK; stats->tx_packets++; } } So, we will always read 0 for the DLC value and "ifconfig" will report *0* TX Bytes. The fix is quite easy: Just move c_can_inval_msg_object() to the end of the if() statement. So: * We only call c_can_inval_msg_object() if the message was actually transmitted * We read out the DLC value _before_ clearing the msg ctrl register Signed-off-by: Jan Altenberg Acked-by: Kurt Van Dijck Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- drivers/net/can/c_can/c_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index d0bffb08aef..31552959aed 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -699,7 +699,6 @@ static void c_can_do_tx(struct net_device *dev) for (/* nix */; (priv->tx_next - priv->tx_echo) > 0; priv->tx_echo++) { msg_obj_no = get_tx_echo_msg_obj(priv); - c_can_inval_msg_object(dev, 0, msg_obj_no); val = c_can_read_reg32(priv, &priv->regs->txrqst1); if (!(val & (1 << msg_obj_no))) { can_get_echo_skb(dev, @@ -708,6 +707,7 @@ static void c_can_do_tx(struct net_device *dev) &priv->regs->ifregs[0].msg_cntrl) & IF_MCONT_DLC_MASK; stats->tx_packets++; + c_can_inval_msg_object(dev, 0, msg_obj_no); } } -- cgit v1.2.3-70-g09d2 From 8628bd8af7c4c14f40f5183f80f5744c4e682439 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Thu, 24 Mar 2011 07:44:22 +0000 Subject: ipv4: Fix IP timestamp option (IPOPT_TS_PRESPEC) handling in ip_options_echo() The current handling of echoed IP timestamp options with prespecified addresses is rather broken since the 2.2.x kernels. As far as i understand it, it should behave like when originating packets. Currently it will only timestamp the next free slot if: - there is space for *two* timestamps - some random data from the echoed packet taken as an IP is *not* a local IP This first is caused by an off-by-one error. 'soffset' points to the next free slot and so we only need to have 'soffset + 7 <= optlen'. The second bug is using sptr as the start of the option, when it really is set to 'skb_network_header(skb)'. I just use dptr instead which points to the timestamp option. Finally it would only timestamp for non-local IPs, which we shouldn't do. So instead we exclude all unicast destinations, similar to what we do in ip_options_compile(). Signed-off-by: Jan Luebbe Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1906fa35860..28a736f3442 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } else { dopt->ts_needtime = 0; - if (soffset + 8 <= optlen) { + if (soffset + 7 <= optlen) { __be32 addr; - memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { + memcpy(&addr, dptr+soffset-1, 4); + if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { dopt->ts_needtime = 1; soffset += 8; } -- cgit v1.2.3-70-g09d2 From edf947f10074fea27fdb1730524dca59355a1c40 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 24 Mar 2011 13:24:01 +0000 Subject: bridge: notify applications if address of bridge device changes The mac address of the bridge device may be changed when a new interface is added to the bridge. If this happens, then the bridge needs to call the network notifiers to tickle any other systems that care. Since bridge can be a module, this also means exporting the notifier function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- net/bridge/br_private.h | 2 +- net/bridge/br_stp_if.c | 9 ++++++--- net/core/dev.c | 1 + 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index dce8f0009a1..718b60366df 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -389,6 +389,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; int err = 0; + bool changed_addr; /* Don't allow bridging non-ethernet like devices */ if ((dev->flags & IFF_LOOPBACK) || @@ -446,7 +447,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) list_add_rcu(&p->list, &br->port_list); spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); br_features_recompute(br); if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && @@ -456,6 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) br_ifinfo_notify(RTM_NEWLINK, p); + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + dev_set_mtu(br->dev, br_min_mtu(br)); kobject_uevent(&p->kobj, KOBJ_ADD); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 19e2f46ed08..387013d3374 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -497,7 +497,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br); extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val); extern void br_stp_enable_port(struct net_bridge_port *p); extern void br_stp_disable_port(struct net_bridge_port *p); -extern void br_stp_recalculate_bridge_id(struct net_bridge *br); +extern bool br_stp_recalculate_bridge_id(struct net_bridge *br); extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); extern void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 79372d4a405..5593f5aec94 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -204,7 +204,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; /* called under bridge lock */ -void br_stp_recalculate_bridge_id(struct net_bridge *br) +bool br_stp_recalculate_bridge_id(struct net_bridge *br) { const unsigned char *br_mac_zero = (const unsigned char *)br_mac_zero_aligned; @@ -222,8 +222,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) } - if (compare_ether_addr(br->bridge_id.addr, addr)) - br_stp_change_bridge_id(br, addr); + if (compare_ether_addr(br->bridge_id.addr, addr) == 0) + return false; /* no change */ + + br_stp_change_bridge_id(br, addr); + return true; } /* called under bridge lock */ diff --git a/net/core/dev.c b/net/core/dev.c index 9b23dcefae6..563ddc28139 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1423,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } +EXPORT_SYMBOL(call_netdevice_notifiers); /* When > 0 there are consumers of rx skb time stamps */ static atomic_t netstamp_needed = ATOMIC_INIT(0); -- cgit v1.2.3-70-g09d2 From b3cd965739b8677f6e04913aa535fa156b09e73d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 25 Mar 2011 01:21:51 +0000 Subject: myri10ge: small rx_done refactoring Avoid theoretical race condition regarding accessing dev->features NETIF_F_LRO flag, which is illustrated below. CPU1 CPU2 myri10ge_clean_rx_done(): myri10ge_set_flags(): or myri10ge_set_rx_csum(): if (dev->features & NETIF_F_LRO) setup lro dev->features |= NETIF_F_LRO or dev->features &= ~NETIF_F_LRO; if (dev->features & NETIF_F_LRO) flush lro On the way reduce myri10ge_rx_done() number of arguments and calls by moving mgp->small_bytes check into that function. That reduce code size from: text data bss dec hex filename 36644 248 100 36992 9080 drivers/net/myri10ge/myri10ge.o to: text data bss dec hex filename 36037 247 100 36384 8e20 drivers/net/myri10ge/myri10ge.o on my i686 system, what should also make myri10ge_clean_rx_done() being faster. Signed-off-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- drivers/net/myri10ge/myri10ge.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 1f4e8680a96..673dc600c89 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1312,17 +1312,26 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev, * page into an skb */ static inline int -myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx, - int bytes, int len, __wsum csum) +myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, + int lro_enabled) { struct myri10ge_priv *mgp = ss->mgp; struct sk_buff *skb; struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME]; - int i, idx, hlen, remainder; + struct myri10ge_rx_buf *rx; + int i, idx, hlen, remainder, bytes; struct pci_dev *pdev = mgp->pdev; struct net_device *dev = mgp->dev; u8 *va; + if (len <= mgp->small_bytes) { + rx = &ss->rx_small; + bytes = mgp->small_bytes; + } else { + rx = &ss->rx_big; + bytes = mgp->big_bytes; + } + len += MXGEFW_PAD; idx = rx->cnt & rx->mask; va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; @@ -1341,7 +1350,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx, remainder -= MYRI10GE_ALLOC_SIZE; } - if (dev->features & NETIF_F_LRO) { + if (lro_enabled) { rx_frags[0].page_offset += MXGEFW_PAD; rx_frags[0].size -= MXGEFW_PAD; len -= MXGEFW_PAD; @@ -1463,7 +1472,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) { struct myri10ge_rx_done *rx_done = &ss->rx_done; struct myri10ge_priv *mgp = ss->mgp; - struct net_device *netdev = mgp->dev; + unsigned long rx_bytes = 0; unsigned long rx_packets = 0; unsigned long rx_ok; @@ -1474,18 +1483,18 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) u16 length; __wsum checksum; + /* + * Prevent compiler from generating more than one ->features memory + * access to avoid theoretical race condition with functions that + * change NETIF_F_LRO flag at runtime. + */ + bool lro_enabled = ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO; + while (rx_done->entry[idx].length != 0 && work_done < budget) { length = ntohs(rx_done->entry[idx].length); rx_done->entry[idx].length = 0; checksum = csum_unfold(rx_done->entry[idx].checksum); - if (length <= mgp->small_bytes) - rx_ok = myri10ge_rx_done(ss, &ss->rx_small, - mgp->small_bytes, - length, checksum); - else - rx_ok = myri10ge_rx_done(ss, &ss->rx_big, - mgp->big_bytes, - length, checksum); + rx_ok = myri10ge_rx_done(ss, length, checksum, lro_enabled); rx_packets += rx_ok; rx_bytes += rx_ok * (unsigned long)length; cnt++; @@ -1497,7 +1506,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) ss->stats.rx_packets += rx_packets; ss->stats.rx_bytes += rx_bytes; - if (netdev->features & NETIF_F_LRO) + if (lro_enabled) lro_flush_all(&rx_done->lro_mgr); /* restock receive rings if needed */ -- cgit v1.2.3-70-g09d2 From f4e5bd4f5784c0608078805f29d4a4897ee482f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 26 Mar 2011 01:34:06 +0000 Subject: Net / jme: Do not use legacy PCI power management The jme driver uses the legacy PCI power management, so it has to do some PCI-specific things in its ->suspend() and ->resume() callbacks, which isn't necessary and should better be done by the PCI sybsystem-level power management code. It also doesn't use device wakeup flags correctly. Convert jme to the new PCI power management framework and make it let the PCI subsystem take care of all the PCI-specific aspects of device handling during system power transitions. Signed-off-by: Rafael J. Wysocki Signed-off-by: David S. Miller --- drivers/net/jme.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/jme.c b/drivers/net/jme.c index f690474f440..994c80939c7 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -273,7 +273,7 @@ jme_clear_pm(struct jme_adapter *jme) { jwrite32(jme, JME_PMCS, 0xFFFF0000 | jme->reg_pmcs); pci_set_power_state(jme->pdev, PCI_D0); - pci_enable_wake(jme->pdev, PCI_D0, false); + device_set_wakeup_enable(&jme->pdev->dev, false); } static int @@ -2538,6 +2538,8 @@ jme_set_wol(struct net_device *netdev, jwrite32(jme, JME_PMCS, jme->reg_pmcs); + device_set_wakeup_enable(&jme->pdev->dev, jme->reg_pmcs); + return 0; } @@ -3172,9 +3174,9 @@ jme_shutdown(struct pci_dev *pdev) } #ifdef CONFIG_PM -static int -jme_suspend(struct pci_dev *pdev, pm_message_t state) +static int jme_suspend(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct jme_adapter *jme = netdev_priv(netdev); @@ -3206,22 +3208,18 @@ jme_suspend(struct pci_dev *pdev, pm_message_t state) tasklet_hi_enable(&jme->rxclean_task); tasklet_hi_enable(&jme->rxempty_task); - pci_save_state(pdev); jme_powersave_phy(jme); - pci_enable_wake(jme->pdev, PCI_D3hot, true); - pci_set_power_state(pdev, PCI_D3hot); return 0; } -static int -jme_resume(struct pci_dev *pdev) +static int jme_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct jme_adapter *jme = netdev_priv(netdev); - jme_clear_pm(jme); - pci_restore_state(pdev); + jwrite32(jme, JME_PMCS, 0xFFFF0000 | jme->reg_pmcs); jme_phy_on(jme); if (test_bit(JME_FLAG_SSET, &jme->flags)) @@ -3238,6 +3236,13 @@ jme_resume(struct pci_dev *pdev) return 0; } + +static SIMPLE_DEV_PM_OPS(jme_pm_ops, jme_suspend, jme_resume); +#define JME_PM_OPS (&jme_pm_ops) + +#else + +#define JME_PM_OPS NULL #endif static DEFINE_PCI_DEVICE_TABLE(jme_pci_tbl) = { @@ -3251,11 +3256,8 @@ static struct pci_driver jme_driver = { .id_table = jme_pci_tbl, .probe = jme_init_one, .remove = __devexit_p(jme_remove_one), -#ifdef CONFIG_PM - .suspend = jme_suspend, - .resume = jme_resume, -#endif /* CONFIG_PM */ .shutdown = jme_shutdown, + .driver.pm = JME_PM_OPS, }; static int __init -- cgit v1.2.3-70-g09d2 From 3e49e6d520401e1d25ec8d366520aad2c01adc1c Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Sat, 26 Mar 2011 05:10:30 +0000 Subject: net: use CHECKSUM_NONE instead of magic number Two places in the kernel were doing skb->ip_summed = 0. Change both to skb->ip_summed = CHECKSUM_NONE, which is more readable. Signed-off-by: Cesar Eduardo Barros Signed-off-by: David S. Miller --- drivers/net/ksz884x.c | 2 +- net/ipv6/ip6mr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c index 540a8dcbcc4..7f7d5708a65 100644 --- a/drivers/net/ksz884x.c +++ b/drivers/net/ksz884x.c @@ -4898,7 +4898,7 @@ static netdev_tx_t netdev_tx(struct sk_buff *skb, struct net_device *dev) goto unlock; } skb_copy_and_csum_dev(org_skb, skb->data); - org_skb->ip_summed = 0; + org_skb->ip_summed = CHECKSUM_NONE; skb->len = org_skb->len; copy_old_skb(org_skb, skb); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7ff0343e05c..29e48593bf2 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -663,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = 0; + skb->ip_summed = CHECKSUM_NONE; skb->pkt_type = PACKET_HOST; skb_tunnel_rx(skb, reg_dev); -- cgit v1.2.3-70-g09d2 From 27fd9de8eb9bf48e4a09c8f02d13f56e9a074a1e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 26 Mar 2011 16:42:31 +0000 Subject: tg3: use and instead and It is proper style to include linux/foo.h instead asm/foo.h if both exist Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/tg3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index ebec88882c3..513565503f9 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -48,9 +48,9 @@ #include #include -#include +#include #include -#include +#include #ifdef CONFIG_SPARC #include -- cgit v1.2.3-70-g09d2 From 6303e6e8da770e62a4d38f7206fe046514276d14 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 26 Mar 2011 16:42:33 +0000 Subject: tg3: Fix inline keyword usage The correct usage is "static inline void" not "static void inline". Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 513565503f9..73c942d85f0 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -13118,7 +13118,7 @@ done: static struct pci_dev * __devinit tg3_find_peer(struct tg3 *); -static void inline vlan_features_add(struct net_device *dev, unsigned long flags) +static inline void vlan_features_add(struct net_device *dev, unsigned long flags) { dev->vlan_features |= flags; } -- cgit v1.2.3-70-g09d2 From b5845f9834d8f4c79d324bc59b99dbcf0a40f428 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 27 Mar 2011 01:01:26 +0000 Subject: mlx4_en: Fix loss of promiscuity The mlx4_en driver uses the combination stop_port/start_port in a number of places. Unfortunately that causes any promiscuous mode settings on the hardware to be lost. This patch fixes that problem. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/mlx4/en_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index 5762ebde445..4f158baa024 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -742,6 +742,9 @@ int mlx4_en_start_port(struct net_device *dev) 0, MLX4_PROT_ETH)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); + /* Must redo promiscuous mode setup. */ + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); + /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->mcast_task); -- cgit v1.2.3-70-g09d2 From 673e63c688f43104c73aad8ea4237f7ad41fa14d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 22 Mar 2011 23:54:49 +0000 Subject: net: fix ethtool->set_flags not intended -EINVAL return value After commit d5dbda23804156ae6f35025ade5307a49d1db6d7 "ethtool: Add support for vlan accleration.", drivers that have NETIF_F_HW_VLAN_TX, and/or NETIF_F_HW_VLAN_RX feature, but do not allow enable/disable vlan acceleration via ethtool set_flags, always return -EINVAL from that function. Fix by returning -EINVAL only if requested features do not match current settings and can not be changed by driver. Change any driver that define ethtool->set_flags to use ethtool_invalid_flags() to avoid similar problems in the future (also on drivers that do not have the problem). Tested with modified (to reproduce this bug) myri10ge driver. Cc: stable@kernel.org # 2.6.37+ Signed-off-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_ethtool.c | 2 +- drivers/net/qlcnic/qlcnic_ethtool.c | 2 +- drivers/net/s2io.c | 2 +- drivers/net/vmxnet3/vmxnet3_ethtool.c | 4 ++-- drivers/net/vxge/vxge-ethtool.c | 4 ++-- include/linux/ethtool.h | 1 + net/core/ethtool.c | 17 ++++++++++++++++- 7 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c index 653d308e0f5..3bdcc803ec6 100644 --- a/drivers/net/netxen/netxen_nic_ethtool.c +++ b/drivers/net/netxen/netxen_nic_ethtool.c @@ -871,7 +871,7 @@ static int netxen_nic_set_flags(struct net_device *netdev, u32 data) struct netxen_adapter *adapter = netdev_priv(netdev); int hw_lro; - if (data & ~ETH_FLAG_LRO) + if (ethtool_invalid_flags(netdev, data, ETH_FLAG_LRO)) return -EINVAL; if (!(adapter->capabilities & NX_FW_CAPABILITY_HW_LRO)) diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c index 4c14510e2a8..45b2755d6cb 100644 --- a/drivers/net/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/qlcnic/qlcnic_ethtool.c @@ -1003,7 +1003,7 @@ static int qlcnic_set_flags(struct net_device *netdev, u32 data) struct qlcnic_adapter *adapter = netdev_priv(netdev); int hw_lro; - if (data & ~ETH_FLAG_LRO) + if (ethtool_invalid_flags(netdev, data, ETH_FLAG_LRO)) return -EINVAL; if (!(adapter->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO)) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 2ad6364103e..356e74d20b8 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -6726,7 +6726,7 @@ static int s2io_ethtool_set_flags(struct net_device *dev, u32 data) int rc = 0; int changed = 0; - if (data & ~ETH_FLAG_LRO) + if (ethtool_invalid_flags(dev, data, ETH_FLAG_LRO)) return -EINVAL; if (data & ETH_FLAG_LRO) { diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 81254be85b9..51f2ef142a5 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -304,8 +304,8 @@ vmxnet3_set_flags(struct net_device *netdev, u32 data) u8 lro_present = (netdev->features & NETIF_F_LRO) == 0 ? 0 : 1; unsigned long flags; - if (data & ~ETH_FLAG_LRO) - return -EOPNOTSUPP; + if (ethtool_invalid_flags(netdev, data, ETH_FLAG_LRO)) + return -EINVAL; if (lro_requested ^ lro_present) { /* toggle the LRO feature*/ diff --git a/drivers/net/vxge/vxge-ethtool.c b/drivers/net/vxge/vxge-ethtool.c index 1dd3a21b3a4..c5eb034107f 100644 --- a/drivers/net/vxge/vxge-ethtool.c +++ b/drivers/net/vxge/vxge-ethtool.c @@ -1117,8 +1117,8 @@ static int vxge_set_flags(struct net_device *dev, u32 data) struct vxgedev *vdev = netdev_priv(dev); enum vxge_hw_status status; - if (data & ~ETH_FLAG_RXHASH) - return -EOPNOTSUPP; + if (ethtool_invalid_flags(dev, data, ETH_FLAG_RXHASH)) + return -EINVAL; if (!!(data & ETH_FLAG_RXHASH) == vdev->devh->config.rth_en) return 0; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ae757bcf128..c8fcbdd2b0e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -680,6 +680,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data); u32 ethtool_op_get_flags(struct net_device *dev); int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported); void ethtool_ntuple_flush(struct net_device *dev); +bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported); /** * ðtool_ops - Alter and report network device settings diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 24bd57493c0..74ead9eca12 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -141,9 +141,24 @@ u32 ethtool_op_get_flags(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_flags); +/* Check if device can enable (or disable) particular feature coded in "data" + * argument. Flags "supported" describe features that can be toggled by device. + * If feature can not be toggled, it state (enabled or disabled) must match + * hardcoded device features state, otherwise flags are marked as invalid. + */ +bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) +{ + u32 features = dev->features & flags_dup_features; + /* "data" can contain only flags_dup_features bits, + * see __ethtool_set_flags */ + + return (features & ~supported) != (data & ~supported); +} +EXPORT_SYMBOL(ethtool_invalid_flags); + int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) { - if (data & ~supported) + if (ethtool_invalid_flags(dev, data, supported)) return -EINVAL; dev->features = ((dev->features & ~flags_dup_features) | -- cgit v1.2.3-70-g09d2 From 4910ac6c526d2868adcb5893e0c428473de862b5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Mar 2011 16:51:15 -0700 Subject: ipv4: Don't ip_rt_put() an error pointer in RAW sockets. Reported-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- net/ipv4/raw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e837ffd3edc..2d3c72e5bbb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); + rt = NULL; goto done; } } -- cgit v1.2.3-70-g09d2 From a715dea3c8e9ef2771c534e05ee1d36f65987e64 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 27 Mar 2011 14:57:26 +0000 Subject: net: Always allocate at least 16 skb frags regardless of page size When analysing performance of the cxgb3 on a ppc64 box I noticed that we weren't doing much GRO merging. It turns out we are limited by the number of SKB frags: #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) With a 4kB page size we have 18 frags, but with a 64kB page size we only have 3 frags. I ran a single stream TCP bandwidth test to compare the performance of Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 24cfa626931..239083bfea1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -122,8 +122,14 @@ struct sk_buff_head { struct sk_buff; -/* To allow 64K frame to be packed as single skb without frag_list */ +/* To allow 64K frame to be packed as single skb without frag_list. Since + * GRO uses frags we allocate at least 16 regardless of page size. + */ +#if (65536/PAGE_SIZE + 2) < 16 +#define MAX_SKB_FRAGS 16 +#else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) +#endif typedef struct skb_frag_struct skb_frag_t; -- cgit v1.2.3-70-g09d2 From c211c9698920d2b114bd8fbf913c8bdab3918461 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 27 Mar 2011 16:50:41 +0000 Subject: cxgb3: Apply interrupt coalescing settings to all queues While testing the performance of different receive interrupt coalescing settings on a single stream TCP benchmark, I noticed two very different results. With rx-usecs=50, most of the time a connection would hit 8280 Mbps but once in a while it would hit 9330 Mbps. It turns out we are only applying the interrupt coalescing settings to the first queue and whenever the rx hash would direct us onto that queue we ran faster. With this patch applied and rx-usecs=50, I get 9330 Mbps consistently. Signed-off-by: Anton Blanchard Acked-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb3/cxgb3_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 4d538a4e9d5..91089314329 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -1983,14 +1983,20 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; - struct qset_params *qsp = &adapter->params.sge.qset[0]; - struct sge_qset *qs = &adapter->sge.qs[0]; + struct qset_params *qsp; + struct sge_qset *qs; + int i; if (c->rx_coalesce_usecs * 10 > M_NEWTIMER) return -EINVAL; - qsp->coalesce_usecs = c->rx_coalesce_usecs; - t3_update_qset_coalesce(qs, qsp); + for (i = 0; i < pi->nqsets; i++) { + qsp = &adapter->params.sge.qset[i]; + qs = &adapter->sge.qs[i]; + qsp->coalesce_usecs = c->rx_coalesce_usecs; + t3_update_qset_coalesce(qs, qsp); + } + return 0; } -- cgit v1.2.3-70-g09d2 From bc8d7da3f19191f86dcc8274cf1a3f2d6aeb0aaa Mon Sep 17 00:00:00 2001 From: Balaji G Date: Sun, 27 Mar 2011 17:15:19 +0000 Subject: drivers net: Fix declaration ordering in inline functions. The correct usage should be "static inline void" instead of "static void inline" Signed-off-by: G.Balaji Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index d1865cc9731..8e6d618b530 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -8317,7 +8317,7 @@ static const struct net_device_ops bnx2_netdev_ops = { #endif }; -static void inline vlan_features_add(struct net_device *dev, u32 flags) +static inline void vlan_features_add(struct net_device *dev, u32 flags) { dev->vlan_features |= flags; } -- cgit v1.2.3-70-g09d2 From 72f49050ba18959472aac723cd9d094bc3547e89 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sun, 27 Mar 2011 22:33:13 +0000 Subject: netdev: bfin_mac: document TE setting in RMII modes The current code sometimes generates build warnings due to how it checks the silicon revision, so clean it up and properly document things. Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- drivers/net/bfin_mac.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 22abfb39d81..68d45ba2d9b 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -1237,8 +1237,17 @@ static int bfin_mac_enable(struct phy_device *phydev) if (phydev->interface == PHY_INTERFACE_MODE_RMII) { opmode |= RMII; /* For Now only 100MBit are supported */ -#if (defined(CONFIG_BF537) || defined(CONFIG_BF536)) && CONFIG_BF_REV_0_2 - opmode |= TE; +#if defined(CONFIG_BF537) || defined(CONFIG_BF536) + if (__SILICON_REVISION__ < 3) { + /* + * This isn't publicly documented (fun times!), but in + * silicon <=0.2, the RX and TX pins are clocked together. + * So in order to recv, we must enable the transmit side + * as well. This will cause a spurious TX interrupt too, + * but we can easily consume that. + */ + opmode |= TE; + } #endif } -- cgit v1.2.3-70-g09d2 From 36ae0148dbb6b9e15d8f067bb7523fd2b765a6af Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:45:52 +0000 Subject: xfrm: Move the test on replay window size into the replay check functions As it is, the replay check is just performed if the replay window of the legacy implementation is nonzero. So we move the test on a nonzero replay window inside the replay check functions to be sure we are testing for the right implementation. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 341cd1189f8..a026b0ef244 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -173,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop_unlock; } - if (x->props.replay_window && x->repl->check(x, skb, seq)) { + if (x->repl->check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 2f5be5b1574..f218385950c 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -118,6 +118,9 @@ static int xfrm_replay_check(struct xfrm_state *x, u32 diff; u32 seq = ntohl(net_seq); + if (!x->props.replay_window) + return 0; + if (unlikely(seq == 0)) goto err; @@ -193,9 +196,14 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, { unsigned int bitnr, nr; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; u32 seq = ntohl(net_seq); u32 diff = replay_esn->seq - seq; - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (!replay_esn->replay_window) + return 0; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; if (unlikely(seq == 0)) goto err; @@ -373,12 +381,17 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, unsigned int bitnr, nr; u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; u32 seq = ntohl(net_seq); - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; u32 wsize = replay_esn->replay_window; u32 top = replay_esn->seq; u32 bottom = top - wsize + 1; + if (!wsize) + return 0; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && (replay_esn->seq < replay_esn->replay_window - 1))) goto err; -- cgit v1.2.3-70-g09d2 From af2f464e326ebad57284cfdecb03f1606e89bbc7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:46:39 +0000 Subject: xfrm: Assign esn pointers when cloning a state When we clone a xfrm state we have to assign the replay_esn and the preplay_esn pointers to the state if we use the new replay detection method. To this end, we add a xfrm_replay_clone() function that allocates memory for the replay detection and takes over the necessary values from the original state. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 22 ++++++++++++++++++++++ net/xfrm/xfrm_state.c | 6 ++++++ 2 files changed, 28 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index cffa5dc6644..6ae4bc5ce8a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1601,6 +1601,28 @@ static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay } #ifdef CONFIG_XFRM_MIGRATE +static inline int xfrm_replay_clone(struct xfrm_state *x, + struct xfrm_state *orig) +{ + x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn), + GFP_KERNEL); + if (!x->replay_esn) + return -ENOMEM; + + x->replay_esn->bmp_len = orig->replay_esn->bmp_len; + x->replay_esn->replay_window = orig->replay_esn->replay_window; + + x->preplay_esn = kmemdup(x->replay_esn, + xfrm_replay_state_esn_len(x->replay_esn), + GFP_KERNEL); + if (!x->preplay_esn) { + kfree(x->replay_esn); + return -ENOMEM; + } + + return 0; +} + static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f83a3d1da81..dd78536d40d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1181,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; } + if (orig->replay_esn) { + err = xfrm_replay_clone(x, orig); + if (err) + goto error; + } + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); err = xfrm_init_state(x); -- cgit v1.2.3-70-g09d2 From e2b19125e94124daaeda1ddcf9b85b04575ad86f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:47:30 +0000 Subject: xfrm: Check for esn buffer len in xfrm_new_ae In xfrm_new_ae() we may overwrite the allocated esn replay state buffer with a wrong size. So check that the new size matches the original allocated size and return an error if this is not the case. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fc152d28753..ccc4c0c8ef0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -360,6 +360,23 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, + struct nlattr *rp) +{ + struct xfrm_replay_state_esn *up; + + if (!replay_esn || !rp) + return 0; + + up = nla_data(rp); + + if (xfrm_replay_state_esn_len(replay_esn) != + xfrm_replay_state_esn_len(up)) + return -EINVAL; + + return 0; +} + static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, struct xfrm_replay_state_esn **preplay_esn, struct nlattr *rta) @@ -1766,6 +1783,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (x->km.state != XFRM_STATE_VALID) goto out; + err = xfrm_replay_verify_len(x->replay_esn, rp); + if (err) + goto out; + spin_lock_bh(&x->lock); xfrm_update_ae_params(x, attrs); spin_unlock_bh(&x->lock); -- cgit v1.2.3-70-g09d2 From 02aadf72fe2c83f145e3437734e66be53abae481 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:48:09 +0000 Subject: xfrm: Restrict extended sequence numbers to esp The IPsec extended sequence numbers are fully implemented just for esp. So restrict the usage to esp until other protocols have support too. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ccc4c0c8ef0..3d15d3e1b2c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -127,6 +127,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (!rt) return 0; + if (p->id.proto != IPPROTO_ESP) + return -EINVAL; + if (p->replay_window != 0) return -EINVAL; -- cgit v1.2.3-70-g09d2