diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 18:07:07 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 18:07:07 -0800 |
commit | 6be35c700f742e911ecedd07fcc43d4439922334 (patch) | |
tree | ca9f37214d204465fcc2d79c82efd291e357c53c /net/openvswitch | |
parent | e37aa63e87bd581f9be5555ed0ba83f5295c92fc (diff) | |
parent | 520dfe3a3645257bf83660f672c47f8558f3d4c4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller:
1) Allow to dump, monitor, and change the bridge multicast database
using netlink. From Cong Wang.
2) RFC 5961 TCP blind data injection attack mitigation, from Eric
Dumazet.
3) Networking user namespace support from Eric W. Biederman.
4) tuntap/virtio-net multiqueue support by Jason Wang.
5) Support for checksum offload of encapsulated packets (basically,
tunneled traffic can still be checksummed by HW). From Joseph
Gasparakis.
6) Allow BPF filter access to VLAN tags, from Eric Dumazet and
Daniel Borkmann.
7) Bridge port parameters over netlink and BPDU blocking support
from Stephen Hemminger.
8) Improve data access patterns during inet socket demux by rearranging
socket layout, from Eric Dumazet.
9) TIPC protocol updates and cleanups from Ying Xue, Paul Gortmaker, and
Jon Maloy.
10) Update TCP socket hash sizing to be more in line with current day
realities. The existing heurstics were choosen a decade ago.
From Eric Dumazet.
11) Fix races, queue bloat, and excessive wakeups in ATM and
associated drivers, from Krzysztof Mazur and David Woodhouse.
12) Support DOVE (Distributed Overlay Virtual Ethernet) extensions
in VXLAN driver, from David Stevens.
13) Add "oops_only" mode to netconsole, from Amerigo Wang.
14) Support set and query of VEB/VEPA bridge mode via PF_BRIDGE, also
allow DCB netlink to work on namespaces other than the initial
namespace. From John Fastabend.
15) Support PTP in the Tigon3 driver, from Matt Carlson.
16) tun/vhost zero copy fixes and improvements, plus turn it on
by default, from Michael S. Tsirkin.
17) Support per-association statistics in SCTP, from Michele
Baldessari.
And many, many, driver updates, cleanups, and improvements. Too
numerous to mention individually.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1722 commits)
net/mlx4_en: Add support for destination MAC in steering rules
net/mlx4_en: Use generic etherdevice.h functions.
net: ethtool: Add destination MAC address to flow steering API
bridge: add support of adding and deleting mdb entries
bridge: notify mdb changes via netlink
ndisc: Unexport ndisc_{build,send}_skb().
uapi: add missing netconf.h to export list
pkt_sched: avoid requeues if possible
solos-pci: fix double-free of TX skb in DMA mode
bnx2: Fix accidental reversions.
bna: Driver Version Updated to 3.1.2.1
bna: Firmware update
bna: Add RX State
bna: Rx Page Based Allocation
bna: TX Intr Coalescing Fix
bna: Tx and Rx Optimizations
bna: Code Cleanup and Enhancements
ath9k: check pdata variable before dereferencing it
ath5k: RX timestamp is reported at end of frame
ath9k_htc: RX timestamp is reported at end of frame
...
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/actions.c | 97 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 27 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 28 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 8 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 14 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.h | 3 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 5 |
7 files changed, 166 insertions, 16 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 08114478cb8..ac2defeeba8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -28,6 +28,7 @@ #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <net/ip.h> +#include <net/ipv6.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -162,6 +163,53 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, *addr = new_addr; } +static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4]) +{ + int transport_len = skb->len - skb_transport_offset(skb); + + if (l4_proto == IPPROTO_TCP) { + if (likely(transport_len >= sizeof(struct tcphdr))) + inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, + addr, new_addr, 1); + } else if (l4_proto == IPPROTO_UDP) { + if (likely(transport_len >= sizeof(struct udphdr))) { + struct udphdr *uh = udp_hdr(skb); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace16(&uh->check, skb, + addr, new_addr, 1); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + } +} + +static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) + update_ipv6_checksum(skb, l4_proto, addr, new_addr); + + skb->rxhash = 0; + memcpy(addr, new_addr, sizeof(__be32[4])); +} + +static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +{ + nh->priority = tc >> 4; + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); +} + +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +{ + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; + nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; + nh->flow_lbl[2] = fl & 0x000000FF; +} + static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) { csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); @@ -195,6 +243,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) return 0; } +static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) +{ + struct ipv6hdr *nh; + int err; + __be32 *saddr; + __be32 *daddr; + + err = make_writable(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + saddr = (__be32 *)&nh->saddr; + daddr = (__be32 *)&nh->daddr; + + if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) + set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, + ipv6_key->ipv6_src, true); + + if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + unsigned int offset = 0; + int flags = IP6_FH_F_SKIP_RH; + bool recalc_csum = true; + + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, NULL, + &flags) != NEXTHDR_ROUTING; + + set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, + ipv6_key->ipv6_dst, recalc_csum); + } + + set_ipv6_tc(nh, ipv6_key->ipv6_tclass); + set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); + nh->hop_limit = ipv6_key->ipv6_hlimit; + + return 0; +} + /* Must follow make_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -339,6 +428,10 @@ static int execute_set_action(struct sk_buff *skb, skb->priority = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_SKB_MARK: + skb->mark = nla_get_u32(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; @@ -347,6 +440,10 @@ static int execute_set_action(struct sk_buff *skb, err = set_ipv4(skb, nla_data(nested_attr)); break; + case OVS_KEY_ATTR_IPV6: + err = set_ipv6(skb, nla_data(nested_attr)); + break; + case OVS_KEY_ATTR_TCP: err = set_tcp(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4c4b62ccc7d..f996db34324 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) int error; int key_len; - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + stats = this_cpu_ptr(dp->stats_percpu); /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_extract(skb, p->port_no, &key, &key_len); @@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, return 0; err: - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + stats = this_cpu_ptr(dp->stats_percpu); u64_stats_update_begin(&stats->sync); stats->n_lost++; @@ -479,8 +479,10 @@ static int validate_set(const struct nlattr *a, switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; @@ -500,6 +502,25 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + case OVS_KEY_ATTR_TCP: if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; @@ -675,6 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, + &flow->key.phy.skb_mark, &flow->key.phy.in_port, a[OVS_PACKET_ATTR_KEY]); if (err) @@ -694,6 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; + packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 733cbf49ed1..c3294cebc4f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -604,6 +604,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, key->phy.priority = skb->priority; key->phy.in_port = in_port; + key->phy.skb_mark = skb->mark; skb_reset_mac_header(skb); @@ -689,7 +690,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, } } - } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) { + } else if ((key->eth.type == htons(ETH_P_ARP) || + key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { struct arp_eth_header *arp; arp = (struct arp_eth_header *)skb_network_header(skb); @@ -802,6 +804,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = -1, [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), @@ -987,6 +990,10 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, } else { swkey->phy.in_port = DP_MAX_PORTS; } + if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { + swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); + attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); + } /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) @@ -1086,7 +1093,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (err) return err; } - } else if (swkey->eth.type == htons(ETH_P_ARP)) { + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { const struct ovs_key_arp *arp_key; if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) @@ -1113,6 +1121,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. + * @priority: receives the skb priority + * @mark: receives the skb mark * @in_port: receives the extracted input port. * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. @@ -1122,7 +1132,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, const struct nlattr *attr) { const struct nlattr *nla; @@ -1130,6 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, *in_port = DP_MAX_PORTS; *priority = 0; + *mark = 0; nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); @@ -1148,6 +1159,10 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, return -EINVAL; *in_port = nla_get_u32(nla); break; + + case OVS_KEY_ATTR_SKB_MARK: + *mark = nla_get_u32(nla); + break; } } } @@ -1169,6 +1184,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; + if (swkey->phy.skb_mark && + nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1222,7 +1241,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) ipv6_key->ipv6_tclass = swkey->ip.tos; ipv6_key->ipv6_hlimit = swkey->ip.ttl; ipv6_key->ipv6_frag = swkey->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP)) { + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 14a324eb017..a7bb60ff3b5 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -43,6 +43,7 @@ struct sw_flow_actions { struct sw_flow_key { struct { u32 priority; /* Packet QoS priority. */ + u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { @@ -144,6 +145,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 + * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 @@ -153,14 +155,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 144 + * total 152 */ -#define FLOW_BUFSIZE 144 +#define FLOW_BUFSIZE 152 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, const struct nlattr *); #define MAX_ACTIONS_BUFSIZE (16 * 1024) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index a9033481fa5..a9327e2e48c 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -114,6 +114,15 @@ error: return ERR_PTR(err); } +static void free_port_rcu(struct rcu_head *rcu) +{ + struct netdev_vport *netdev_vport = container_of(rcu, + struct netdev_vport, rcu); + + dev_put(netdev_vport->dev); + ovs_vport_free(vport_from_priv(netdev_vport)); +} + static void netdev_destroy(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); @@ -122,10 +131,7 @@ static void netdev_destroy(struct vport *vport) netdev_rx_handler_unregister(netdev_vport->dev); dev_set_promiscuity(netdev_vport->dev, -1); - synchronize_rcu(); - - dev_put(netdev_vport->dev); - ovs_vport_free(vport); + call_rcu(&netdev_vport->rcu, free_port_rcu); } const char *ovs_netdev_get_name(const struct vport *vport) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index f7072a25c60..6478079b341 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -20,12 +20,15 @@ #define VPORT_NETDEV_H 1 #include <linux/netdevice.h> +#include <linux/rcupdate.h> #include "vport.h" struct vport *ovs_netdev_get_vport(struct net_device *dev); struct netdev_vport { + struct rcu_head rcu; + struct net_device *dev; }; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 03779e8a262..70af0bedbac 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { struct vport_percpu_stats *stats; - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); - + stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->sync); stats->rx_packets++; stats->rx_bytes += skb->len; @@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) if (likely(sent)) { struct vport_percpu_stats *stats; - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->sync); stats->tx_packets++; |