diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-07 10:44:07 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-11 12:54:04 -0700 |
commit | 0ed8ddf4045fcfcac36bad753dc4046118c603ec (patch) | |
tree | cf1d9eb14668c4d2257b3519ed7deec8c5cb396d | |
parent | d122179a3c0fdc71b88cb9e3605f372b1651a9ff (diff) |
neigh: Protect neigh->ha[] with a seqlock
Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)
Dirtying takes place because of read_lock(&n->lock) and n->used writes.
Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/neighbour.h | 16 | ||||
-rw-r--r-- | net/core/neighbour.c | 47 | ||||
-rw-r--r-- | net/ipv4/arp.c | 6 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 8 |
4 files changed, 51 insertions, 26 deletions
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a4538d55370..f04e7a2522c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -105,6 +105,7 @@ struct neighbour { atomic_t refcnt; atomic_t probes; rwlock_t lock; + seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); @@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - neigh->used = jiffies; + unsigned long now = ACCESS_ONCE(jiffies); + + if (neigh->used != now) + neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -373,4 +377,14 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) +static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, + const struct net_device *dev) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&n->ha_lock); + memcpy(dst, n->ha, dev->addr_len); + } while (read_seqretry(&n->ha_lock, seq)); +} #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2044906ecd1..b165b96355b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); + seqlock_init(&n->ha_lock); n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; @@ -1015,7 +1016,7 @@ out_unlock_bh: } EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(const struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } if (lladdr != neigh->ha) { + write_seqlock(&neigh->ha_lock); memcpy(&neigh->ha, lladdr, dev->addr_len); + write_sequnlock(&neigh->ha_lock); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - @@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, { struct hh_cache *hh; + smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ for (hh = n->hh; hh; hh = hh->hh_next) { if (hh->hh_type == protocol) { atomic_inc(&hh->hh_refcnt); @@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, kfree(hh); return; } - read_unlock(&n->lock); - write_lock(&n->lock); + + write_lock_bh(&n->lock); /* must check if another thread already did the insert */ if (neigh_hh_lookup(n, dst, protocol)) { @@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, hh->hh_output = n->ops->output; hh->hh_next = n->hh; + smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ n->hh = hh; if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) hh_cache_put(hh); end: - write_unlock(&n->lock); - read_lock(&n->lock); + write_unlock_bh(&n->lock); } /* This function can be used in contexts, where only old dev_queue_xmit @@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; + unsigned int seq; - read_lock_bh(&neigh->lock); if (dev->header_ops->cache && !dst->hh && !(dst->flags & DST_NOCACHE)) neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); + do { + seq = read_seqbegin(&neigh->ha_lock); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) rc = neigh->ops->queue_xmit(skb); @@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; + unsigned int seq; __skb_pull(skb, skb_network_offset(skb)); - read_lock_bh(&neigh->lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); + do { + seq = read_seqbegin(&neigh->ha_lock); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + } while (read_seqretry(&neigh->ha_lock, seq)); + if (err >= 0) err = neigh->ops->queue_xmit(skb); else { @@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, read_lock_bh(&neigh->lock); ndm->ndm_state = neigh->nud_state; - if ((neigh->nud_state & NUD_VALID) && - nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { - read_unlock_bh(&neigh->lock); - goto nla_put_failure; + if (neigh->nud_state & NUD_VALID) { + char haddr[MAX_ADDR_LEN]; + + neigh_ha_snapshot(haddr, neigh, neigh->dev); + if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } } ci.ndm_used = jiffies_to_clock_t(now - neigh->used); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f3530957817..d8e540c5b07 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) if (n) { n->used = jiffies; - if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { - read_lock_bh(&n->lock); - memcpy(haddr, n->ha, dev->addr_len); - read_unlock_bh(&n->lock); + if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) { + neigh_ha_snapshot(haddr, n, dev); neigh_release(n); return 0; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index feaabc103ce..401af959670 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } if (neigh_event_send(n, skb_res) == 0) { int err; + char haddr[MAX_ADDR_LEN]; - read_lock(&n->lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - n->ha, NULL, skb->len); - read_unlock(&n->lock); + neigh_ha_snapshot(haddr, n, dev); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, + NULL, skb->len); if (err < 0) { neigh_release(n); |