From dc99f600698dcac69b8f56dda9a8a00d645c5ffc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2011 01:45:05 -0700 Subject: packet: Add fanout support. Fanouts allow packet capturing to be demuxed to a set of AF_PACKET sockets. Two fanout policies are implemented: 1) Hashing based upon skb->rxhash 2) Pure round-robin An AF_PACKET socket must be fully bound before it tries to add itself to a fanout. All AF_PACKET sockets trying to join the same fanout must all have the same bind settings. Fanouts are identified (within a network namespace) by a 16-bit ID. The first socket to try to add itself to a fanout with a particular ID, creates that fanout. When the last socket leaves the fanout (which happens only when the socket is closed), that fanout is destroyed. Signed-off-by: David S. Miller --- include/linux/if_packet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/if_packet.h') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 7b318630139..1efa1cb827f 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -49,6 +49,10 @@ struct sockaddr_ll { #define PACKET_VNET_HDR 15 #define PACKET_TX_TIMESTAMP 16 #define PACKET_TIMESTAMP 17 +#define PACKET_FANOUT 18 + +#define PACKET_FANOUT_HASH 0 +#define PACKET_FANOUT_LB 1 struct tpacket_stats { unsigned int tp_packets; -- cgit v1.2.3-70-g09d2 From 7736d33f4262d437c51ed7a28114eacbfca236ff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jul 2011 01:43:20 -0700 Subject: packet: Add pre-defragmentation support for ipv4 fanouts. The skb->rxhash cannot be properly computed if the packet is a fragment. To alleviate this, allow the AF_PACKET client to ask for defragmentation to be done at demux time. Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + net/packet/af_packet.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 2 deletions(-) (limited to 'include/linux/if_packet.h') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 1efa1cb827f..84e684e6935 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -53,6 +53,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 +#define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { unsigned int tp_packets; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3350f1d3c9a..7ba6871a194 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -223,7 +223,7 @@ struct packet_fanout { unsigned int num_members; u16 id; u8 type; - u8 pad; + u8 defrag; atomic_t rr_cur; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; @@ -447,6 +447,41 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb return f->arr[cur]; } +static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + if (skb->protocol != htons(ETH_P_IP)) + return skb; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return skb; + if (!pskb_may_pull(skb, iph->ihl*4)) + return skb; + iph = ip_hdr(skb); + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + return skb; + + if (ip_is_fragment(ip_hdr(skb))) { + skb = skb_clone(skb, GFP_ATOMIC); + if (skb) { + if (pskb_trim_rcsum(skb, len)) + return skb; + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) + return NULL; + skb->rxhash = 0; + } + } + return skb; +} + static int packet_rcv_fanout_hash(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -461,6 +496,12 @@ static int packet_rcv_fanout_hash(struct sk_buff *skb, struct net_device *dev, return 0; } + if (f->defrag) { + skb = fanout_check_defrag(skb); + if (!skb) + return 0; + } + skb_get_rxhash(skb); sk = fanout_demux_hash(f, skb, num); @@ -519,10 +560,12 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -static int fanout_add(struct sock *sk, u16 id, u8 type) +static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; + u8 type = type_flags & 0xff; + u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; int err; switch (type) { @@ -548,12 +591,15 @@ static int fanout_add(struct sock *sk, u16 id, u8 type) break; } } + if (match && match->defrag != defrag) + return -EINVAL; if (!match) { match = kzalloc(sizeof(*match), GFP_KERNEL); if (match) { write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; + match->defrag = defrag; atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); -- cgit v1.2.3-70-g09d2 From 95ec3eb417115fbb2c73b59e2825f6dd5d2f6cf6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 6 Jul 2011 01:56:38 -0700 Subject: packet: Add 'cpu' fanout policy. Unfortunately we have to use a real modulus here as the multiply trick won't work as effectively with cpu numbers as it does with rxhash values. Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + net/packet/af_packet.c | 65 ++++++++++++++++++++--------------------------- 2 files changed, 29 insertions(+), 37 deletions(-) (limited to 'include/linux/if_packet.h') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 84e684e6935..c1486060f5e 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -53,6 +53,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 +#define PACKET_FANOUT_CPU 2 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7ba6871a194..41f0489ff66 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -447,6 +447,13 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb return f->arr[cur]; } +static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +{ + unsigned int cpu = smp_processor_id(); + + return f->arr[cpu % num]; +} + static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) { const struct iphdr *iph; @@ -482,8 +489,8 @@ static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) return skb; } -static int packet_rcv_fanout_hash(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; unsigned int num = f->num_members; @@ -496,35 +503,25 @@ static int packet_rcv_fanout_hash(struct sk_buff *skb, struct net_device *dev, return 0; } - if (f->defrag) { - skb = fanout_check_defrag(skb); - if (!skb) - return 0; - } - - skb_get_rxhash(skb); - - sk = fanout_demux_hash(f, skb, num); - po = pkt_sk(sk); - - return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); -} - -static int packet_rcv_fanout_lb(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; - struct packet_sock *po; - struct sock *sk; - - if (!net_eq(dev_net(dev), read_pnet(&f->net)) || - !num) { - kfree_skb(skb); - return 0; + switch (f->type) { + case PACKET_FANOUT_HASH: + default: + if (f->defrag) { + skb = fanout_check_defrag(skb); + if (!skb) + return 0; + } + skb_get_rxhash(skb); + sk = fanout_demux_hash(f, skb, num); + break; + case PACKET_FANOUT_LB: + sk = fanout_demux_lb(f, skb, num); + break; + case PACKET_FANOUT_CPU: + sk = fanout_demux_cpu(f, skb, num); + break; } - sk = fanout_demux_lb(f, skb, num); po = pkt_sk(sk); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); @@ -571,6 +568,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) switch (type) { case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: + case PACKET_FANOUT_CPU: break; default: return -EINVAL; @@ -606,14 +604,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_set(&match->sk_ref, 0); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; - switch (type) { - case PACKET_FANOUT_HASH: - match->prot_hook.func = packet_rcv_fanout_hash; - break; - case PACKET_FANOUT_LB: - match->prot_hook.func = packet_rcv_fanout_lb; - break; - } + match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); -- cgit v1.2.3-70-g09d2