From 2eb32b0a6f9d73fafc6b2c00ac0b705de13ae143 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 30 Jul 2012 10:17:14 +0000 Subject: drivers: net: ethernet: cpsw: Add device tree support to CPSW This patch adds device tree support for cpsw driver Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 104 +++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/cpsw.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt new file mode 100644 index 00000000000..acca48c4246 --- /dev/null +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -0,0 +1,104 @@ +TI SoC Ethernet Switch Controller Device Tree Bindings +------------------------------------------------------ + +Required properties: +- compatible : Should be "ti,cpsw" +- reg : physical base address and size of the cpsw + registers map +- interrupts : property with a value describing the interrupt + number +- interrupt-parent : The parent interrupt controller +- cpdma_channels : Specifies number of channels in CPDMA +- host_port_no : Specifies host port shift +- cpdma_reg_ofs : Specifies CPDMA submodule register offset +- ale_reg_ofs : Specifies ALE submodule register offset +- ale_entries : Specifies No of entries ALE can hold +- host_port_reg_ofs : Specifies host port register offset +- hw_stats_reg_ofs : Specifies hardware statistics register offset +- bd_ram_ofs : Specifies internal desciptor RAM offset +- bd_ram_size : Specifies internal descriptor RAM size +- rx_descs : Specifies number of Rx descriptors +- mac_control : Specifies Default MAC control register content + for the specific platform +- slaves : Specifies number for slaves +- slave_reg_ofs : Specifies slave register offset +- sliver_reg_ofs : Specifies slave sliver register offset +- phy_id : Specifies slave phy id +- mac-address : Specifies slave MAC address + +Optional properties: +- ti,hwmods : Must be "cpgmac0" +- no_bd_ram : Must be 0 or 1 + +Note: "ti,hwmods" field is used to fetch the base address and irq +resources from TI, omap hwmod data base during device registration. +Future plan is to migrate hwmod data base contents into device tree +blob so that, all the required data will be used from device tree dts +file. + +Examples: + + mac: ethernet@4A100000 { + compatible = "ti,cpsw"; + reg = <0x4A100000 0x1000>; + interrupts = <55 0x4>; + interrupt-parent = <&intc>; + cpdma_channels = 8; + host_port_no = 0; + cpdma_reg_ofs = 0x800; + ale_reg_ofs = 0xd00; + ale_entries = 1024; + host_port_reg_ofs = 0x108; + hw_stats_reg_ofs = 0x900; + bd_ram_ofs = 0x2000; + bd_ram_size = 0x2000; + no_bd_ram = 0; + rx_descs = 64; + mac_control = 0x20; + slaves = 2; + slave@0 { + slave_reg_ofs = 0x208; + sliver_reg_ofs = 0xd80; + phy_id = "davinci_mdio-0:00" + mac-address = [00 04 9F 01 1B B8]; + }; + slave@1 { + slave_reg_ofs = 0x208; + sliver_reg_ofs = 0xd80; + phy_id = "davinci_mdio-0:01" + mac-address = [00 04 9F 01 1B B9]; + }; + }; + +(or) + + mac: ethernet@4A100000 { + compatible = "ti,cpsw"; + ti,hwmods = "cpgmac0"; + cpdma_channels = 8; + host_port_no = 0; + cpdma_reg_ofs = 0x800; + ale_reg_ofs = 0xd00; + ale_entries = 1024; + host_port_reg_ofs = 0x108; + hw_stats_reg_ofs = 0x900; + bd_ram_ofs = 0x2000; + bd_ram_size = 0x2000; + no_bd_ram = 0; + rx_descs = 64; + mac_control = 0x20; + slaves = 2; + slave@0 { + slave_reg_ofs = 0x208; + sliver_reg_ofs = 0xd80; + phy_id = "davinci_mdio-0:00" + mac-address = [00 04 9F 01 1B B8]; + }; + slave@1 { + slave_reg_ofs = 0x208; + sliver_reg_ofs = 0xd80; + phy_id = "davinci_mdio-0:01" + mac-address = [00 04 9F 01 1B B9]; + }; + + }; -- cgit v1.2.3-70-g09d2 From ec03e6a89e5168c92581a769681207c29ad2030f Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 6 Aug 2012 05:05:57 +0000 Subject: drivers: net: ethernet: davince_mdio: device tree implementation device tree implementation for davinci mdio driver Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- .../devicetree/bindings/net/davinci-mdio.txt | 33 +++++++++++++++++ drivers/net/ethernet/ti/davinci_mdio.c | 41 +++++++++++++++++++--- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/davinci-mdio.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt new file mode 100644 index 00000000000..72efaaf764f --- /dev/null +++ b/Documentation/devicetree/bindings/net/davinci-mdio.txt @@ -0,0 +1,33 @@ +TI SoC Davinci MDIO Controller Device Tree Bindings +--------------------------------------------------- + +Required properties: +- compatible : Should be "ti,davinci_mdio" +- reg : physical base address and size of the davinci mdio + registers map +- bus_freq : Mdio Bus frequency + +Optional properties: +- ti,hwmods : Must be "davinci_mdio" + +Note: "ti,hwmods" field is used to fetch the base address and irq +resources from TI, omap hwmod data base during device registration. +Future plan is to migrate hwmod data base contents into device tree +blob so that, all the required data will be used from device tree dts +file. + +Examples: + + mdio: davinci_mdio@4A101000 { + compatible = "ti,cpsw"; + reg = <0x4A101000 0x1000>; + bus_freq = <1000000>; + }; + +(or) + + mdio: davinci_mdio@4A101000 { + compatible = "ti,cpsw"; + ti,hwmods = "davinci_mdio"; + bus_freq = <1000000>; + }; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index cd7ee204e94..573f3be5f42 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include /* * This timeout definition is a worst-case ultra defensive measure against @@ -289,6 +291,25 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id, return 0; } +static int davinci_mdio_probe_dt(struct mdio_platform_data *data, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + u32 prop; + + if (!node) + return -EINVAL; + + if (of_property_read_u32(node, "bus_freq", &prop)) { + pr_err("Missing bus_freq property in the DT.\n"); + return -EINVAL; + } + data->bus_freq = prop; + + return 0; +} + + static int __devinit davinci_mdio_probe(struct platform_device *pdev) { struct mdio_platform_data *pdata = pdev->dev.platform_data; @@ -304,8 +325,6 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) return -ENOMEM; } - data->pdata = pdata ? (*pdata) : default_pdata; - data->bus = mdiobus_alloc(); if (!data->bus) { dev_err(dev, "failed to alloc mii bus\n"); @@ -313,14 +332,22 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) goto bail_out; } + if (dev->of_node) { + if (davinci_mdio_probe_dt(&data->pdata, pdev)) + data->pdata = default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + } else { + data->pdata = pdata ? (*pdata) : default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + } + data->bus->name = dev_name(dev); data->bus->read = davinci_mdio_read, data->bus->write = davinci_mdio_write, data->bus->reset = davinci_mdio_reset, data->bus->parent = dev; data->bus->priv = data; - snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -454,11 +481,17 @@ static const struct dev_pm_ops davinci_mdio_pm_ops = { .resume = davinci_mdio_resume, }; +static const struct of_device_id davinci_mdio_of_mtable[] = { + { .compatible = "ti,davinci_mdio", }, + { /* sentinel */ }, +}; + static struct platform_driver davinci_mdio_driver = { .driver = { .name = "davinci_mdio", .owner = THIS_MODULE, .pm = &davinci_mdio_pm_ops, + .of_match_table = of_match_ptr(davinci_mdio_of_mtable), }, .probe = davinci_mdio_probe, .remove = __devexit_p(davinci_mdio_remove), -- cgit v1.2.3-70-g09d2 From e07b94f1352723994d8b588ac5ed8af91bcc9fb6 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 6 Aug 2012 05:05:58 +0000 Subject: documentation: dt: bindings: cpsw: fixing the examples for directly using it in dts file Fixing the cpsw device tree example to make it simpler to copy pastable to dts file and use it directly. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 101 +++++++++++++------------ 1 file changed, 53 insertions(+), 48 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index acca48c4246..dcaabe9fe86 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -11,6 +11,7 @@ Required properties: - cpdma_channels : Specifies number of channels in CPDMA - host_port_no : Specifies host port shift - cpdma_reg_ofs : Specifies CPDMA submodule register offset +- cpdma_sram_ofs : Specifies CPDMA SRAM offset - ale_reg_ofs : Specifies ALE submodule register offset - ale_entries : Specifies No of entries ALE can hold - host_port_reg_ofs : Specifies host port register offset @@ -43,62 +44,66 @@ Examples: reg = <0x4A100000 0x1000>; interrupts = <55 0x4>; interrupt-parent = <&intc>; - cpdma_channels = 8; - host_port_no = 0; - cpdma_reg_ofs = 0x800; - ale_reg_ofs = 0xd00; - ale_entries = 1024; - host_port_reg_ofs = 0x108; - hw_stats_reg_ofs = 0x900; - bd_ram_ofs = 0x2000; - bd_ram_size = 0x2000; - no_bd_ram = 0; - rx_descs = 64; - mac_control = 0x20; - slaves = 2; - slave@0 { - slave_reg_ofs = 0x208; - sliver_reg_ofs = 0xd80; - phy_id = "davinci_mdio-0:00" - mac-address = [00 04 9F 01 1B B8]; + cpdma_channels = <8>; + host_port_no = <0>; + cpdma_reg_ofs = <0x800>; + cpdma_sram_ofs = <0xa00>; + ale_reg_ofs = <0xd00>; + ale_entries = <1024>; + host_port_reg_ofs = <0x108>; + hw_stats_reg_ofs = <0x900>; + bd_ram_ofs = <0x2000>; + bd_ram_size = <0x2000>; + no_bd_ram = <0>; + rx_descs = <64>; + mac_control = <0x20>; + slaves = <2>; + cpsw_emac0: slave@0 { + slave_reg_ofs = <0x208>; + sliver_reg_ofs = <0xd80>; + phy_id = "davinci_mdio.16:00"; + /* Filled in by U-Boot */ + mac-address = [ 00 00 00 00 00 00 ]; }; - slave@1 { - slave_reg_ofs = 0x208; - sliver_reg_ofs = 0xd80; - phy_id = "davinci_mdio-0:01" - mac-address = [00 04 9F 01 1B B9]; + cpsw_emac1: slave@1 { + slave_reg_ofs = <0x308>; + sliver_reg_ofs = <0xdc0>; + phy_id = "davinci_mdio.16:01"; + /* Filled in by U-Boot */ + mac-address = [ 00 00 00 00 00 00 ]; }; }; (or) - mac: ethernet@4A100000 { compatible = "ti,cpsw"; ti,hwmods = "cpgmac0"; - cpdma_channels = 8; - host_port_no = 0; - cpdma_reg_ofs = 0x800; - ale_reg_ofs = 0xd00; - ale_entries = 1024; - host_port_reg_ofs = 0x108; - hw_stats_reg_ofs = 0x900; - bd_ram_ofs = 0x2000; - bd_ram_size = 0x2000; - no_bd_ram = 0; - rx_descs = 64; - mac_control = 0x20; - slaves = 2; - slave@0 { - slave_reg_ofs = 0x208; - sliver_reg_ofs = 0xd80; - phy_id = "davinci_mdio-0:00" - mac-address = [00 04 9F 01 1B B8]; + cpdma_channels = <8>; + host_port_no = <0>; + cpdma_reg_ofs = <0x800>; + cpdma_sram_ofs = <0xa00>; + ale_reg_ofs = <0xd00>; + ale_entries = <1024>; + host_port_reg_ofs = <0x108>; + hw_stats_reg_ofs = <0x900>; + bd_ram_ofs = <0x2000>; + bd_ram_size = <0x2000>; + no_bd_ram = <0>; + rx_descs = <64>; + mac_control = <0x20>; + slaves = <2>; + cpsw_emac0: slave@0 { + slave_reg_ofs = <0x208>; + sliver_reg_ofs = <0xd80>; + phy_id = "davinci_mdio.16:00"; + /* Filled in by U-Boot */ + mac-address = [ 00 00 00 00 00 00 ]; }; - slave@1 { - slave_reg_ofs = 0x208; - sliver_reg_ofs = 0xd80; - phy_id = "davinci_mdio-0:01" - mac-address = [00 04 9F 01 1B B9]; + cpsw_emac1: slave@1 { + slave_reg_ofs = <0x308>; + sliver_reg_ofs = <0xdc0>; + phy_id = "davinci_mdio.16:01"; + /* Filled in by U-Boot */ + mac-address = [ 00 00 00 00 00 00 ]; }; - }; -- cgit v1.2.3-70-g09d2 From 6b923cb7188d46905f43fa84210c4c3e5f9cd8fb Mon Sep 17 00:00:00 2001 From: John Eaglesham Date: Tue, 21 Aug 2012 20:43:35 +0000 Subject: bonding: support for IPv6 transmit hashing Currently the "bonding" driver does not support load balancing outgoing traffic in LACP mode for IPv6 traffic. IPv4 (and TCP or UDP over IPv4) are currently supported; this patch adds transmit hashing for IPv6 (and TCP or UDP over IPv6), bringing IPv6 up to par with IPv4 support in the bonding driver. In addition, bounds checking has been added to all transmit hashing functions. The algorithm chosen (xor'ing the bottom three quads of the source and destination addresses together, then xor'ing each byte of that result into the bottom byte, finally xor'ing with the last bytes of the MAC addresses) was selected after testing almost 400,000 unique IPv6 addresses harvested from server logs. This algorithm had the most even distribution for both big- and little-endian architectures while still using few instructions. Its behavior also attempts to closely match that of the IPv4 algorithm. The IPv6 flow label was intentionally not included in the hash as it appears to be unset in the vast majority of IPv6 traffic sampled, and the current algorithm not using the flow label already offers a very even distribution. Fragmented IPv6 packets are handled the same way as fragmented IPv4 packets, ie, they are not balanced based on layer 4 information. Additionally, IPv6 packets with intermediate headers are not balanced based on layer 4 information. In practice these intermediate headers are not common and this should not cause any problems, and the alternative (a packet-parsing loop and look-up table) seemed slow and complicated for little gain. Tested-by: John Eaglesham Signed-off-by: John Eaglesham Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 30 ++++++++++-- drivers/net/bonding/bond_main.c | 89 +++++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 31 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 6b1c7110534..10a015c384b 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -752,12 +752,22 @@ xmit_hash_policy protocol information to generate the hash. Uses XOR of hardware MAC addresses and IP addresses to - generate the hash. The formula is + generate the hash. The IPv4 formula is (((source IP XOR dest IP) AND 0xffff) XOR ( source MAC XOR destination MAC )) modulo slave count + The IPv6 formula is + + hash = (source ip quad 2 XOR dest IP quad 2) XOR + (source ip quad 3 XOR dest IP quad 3) XOR + (source ip quad 4 XOR dest IP quad 4) + + (((hash >> 24) XOR (hash >> 16) XOR (hash >> 8) XOR hash) + XOR (source MAC XOR destination MAC)) + modulo slave count + This algorithm will place all traffic to a particular network peer on the same slave. For non-IP traffic, the formula is the same as for the layer2 transmit @@ -778,19 +788,29 @@ xmit_hash_policy slaves, although a single connection will not span multiple slaves. - The formula for unfragmented TCP and UDP packets is + The formula for unfragmented IPv4 TCP and UDP packets is ((source port XOR dest port) XOR ((source IP XOR dest IP) AND 0xffff) modulo slave count - For fragmented TCP or UDP packets and all other IP - protocol traffic, the source and destination port + The formula for unfragmented IPv6 TCP and UDP packets is + + hash = (source port XOR dest port) XOR + ((source ip quad 2 XOR dest IP quad 2) XOR + (source ip quad 3 XOR dest IP quad 3) XOR + (source ip quad 4 XOR dest IP quad 4)) + + ((hash >> 24) XOR (hash >> 16) XOR (hash >> 8) XOR hash) + modulo slave count + + For fragmented TCP or UDP packets and all other IPv4 and + IPv6 protocol traffic, the source and destination port information is omitted. For non-IP traffic, the formula is the same as for the layer2 transmit hash policy. - This policy is intended to mimic the behavior of + The IPv4 policy is intended to mimic the behavior of certain switches, notably Cisco switches with PFC2 as well as some Foundry and IBM products. diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a86174c9fed..b24ce257ac7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3351,57 +3351,94 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ +/* + * Hash for the output device based upon layer 2 data + */ +static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) +{ + struct ethhdr *data = (struct ethhdr *)skb->data; + + if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) + return (data->h_dest[5] ^ data->h_source[5]) % count; + + return 0; +} + /* * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP mimic bond_xmit_hash_policy_l2() + * the packet is not IP, fall back on bond_xmit_hash_policy_l2() */ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) { struct ethhdr *data = (struct ethhdr *)skb->data; - struct iphdr *iph = ip_hdr(skb); - - if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + u32 v6hash; + __be32 *s, *d; + + if (skb->protocol == htons(ETH_P_IP) && + skb_network_header_len(skb) >= sizeof(*iph)) { + iph = ip_hdr(skb); return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ (data->h_dest[5] ^ data->h_source[5])) % count; + } else if (skb->protocol == htons(ETH_P_IPV6) && + skb_network_header_len(skb) >= sizeof(*ipv6h)) { + ipv6h = ipv6_hdr(skb); + s = &ipv6h->saddr.s6_addr32[0]; + d = &ipv6h->daddr.s6_addr32[0]; + v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); + v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); + return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; } - return (data->h_dest[5] ^ data->h_source[5]) % count; + return bond_xmit_hash_policy_l2(skb, count); } /* * Hash for the output device based upon layer 3 and layer 4 data. If * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is - * altogether not IP, mimic bond_xmit_hash_policy_l2() + * altogether not IP, fall back on bond_xmit_hash_policy_l2() */ static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) { - struct ethhdr *data = (struct ethhdr *)skb->data; - struct iphdr *iph = ip_hdr(skb); - __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); - int layer4_xor = 0; - - if (skb->protocol == htons(ETH_P_IP)) { + u32 layer4_xor = 0; + struct iphdr *iph; + struct ipv6hdr *ipv6h; + __be32 *s, *d; + __be16 *layer4hdr; + + if (skb->protocol == htons(ETH_P_IP) && + skb_network_header_len(skb) >= sizeof(*iph)) { + iph = ip_hdr(skb); if (!ip_is_fragment(iph) && (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP)) { - layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); + iph->protocol == IPPROTO_UDP) && + (skb_headlen(skb) - skb_network_offset(skb) >= + iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) { + layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); + layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); } return (layer4_xor ^ ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; - + } else if (skb->protocol == htons(ETH_P_IPV6) && + skb_network_header_len(skb) >= sizeof(*ipv6h)) { + ipv6h = ipv6_hdr(skb); + if ((ipv6h->nexthdr == IPPROTO_TCP || + ipv6h->nexthdr == IPPROTO_UDP) && + (skb_headlen(skb) - skb_network_offset(skb) >= + sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) { + layer4hdr = (__be16 *)(ipv6h + 1); + layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + } + s = &ipv6h->saddr.s6_addr32[0]; + d = &ipv6h->daddr.s6_addr32[0]; + layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); + layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^ + (layer4_xor >> 8); + return layer4_xor % count; } - return (data->h_dest[5] ^ data->h_source[5]) % count; -} - -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) -{ - struct ethhdr *data = (struct ethhdr *)skb->data; - - return (data->h_dest[5] ^ data->h_source[5]) % count; + return bond_xmit_hash_policy_l2(skb, count); } /*-------------------------- Device entry points ----------------------------*/ -- cgit v1.2.3-70-g09d2 From 536a23f119e35e58c762a219bafd398ba2ed7980 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 18 Jun 2012 18:39:26 +0200 Subject: batman-adv: Add the backbone gateway list to debugfs This is especially useful if there are no claims yet, but we still want to know which gateways are using bridge loop avoidance in the network. Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- Documentation/networking/batman-adv.txt | 7 ++-- net/batman-adv/bridge_loop_avoidance.c | 65 +++++++++++++++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 8 ++++ net/batman-adv/debugfs.c | 12 ++++++ 4 files changed, 89 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 8f3ae4a6147..a173d2a879f 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -75,9 +75,10 @@ folder: There is a special folder for debugging information: -# ls /sys/kernel/debug/batman_adv/bat0/ -# bla_claim_table log socket transtable_local -# gateways originators transtable_global vis_data +# ls /sys/kernel/debug/batman_adv/bat0/ +# bla_backbone_table log transtable_global +# bla_claim_table originators transtable_local +# gateways socket vis_data Some of the files contain all sort of status information regard- ing the mesh network. For example, you can view the table of diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6705d35b17c..7e60466f571 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1616,3 +1616,68 @@ out: batadv_hardif_free_ref(primary_if); return ret; } + +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->backbone_hash; + struct batadv_backbone_gw *backbone_gw; + struct batadv_hard_iface *primary_if; + struct hlist_node *node; + struct hlist_head *head; + int secs, msecs; + uint32_t i; + bool is_own; + int ret = 0; + uint8_t *primary_addr; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - please specify interfaces to enable it\n", + net_dev->name); + goto out; + } + + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = seq_printf(seq, + "BATMAN mesh %s disabled - primary interface not active\n", + net_dev->name); + goto out; + } + + primary_addr = primary_if->net_dev->dev_addr; + seq_printf(seq, + "Backbones announced for the mesh %s (orig %pM, group id %04x)\n", + net_dev->name, primary_addr, + ntohs(bat_priv->claim_dest.group)); + seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n", + "Originator", "VID", "last seen", "CRC"); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + msecs = jiffies_to_msecs(jiffies - + backbone_gw->lasttime); + secs = msecs / 1000; + msecs = msecs % 1000; + + is_own = batadv_compare_eth(backbone_gw->orig, + primary_addr); + if (is_own) + continue; + + seq_printf(seq, + " * %pM on % 5d % 4i.%03is (%04x)\n", + backbone_gw->orig, backbone_gw->vid, + secs, msecs, backbone_gw->crc); + } + rcu_read_unlock(); + } +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 3f0dfa2f6d7..789cb73bde6 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -27,6 +27,8 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset); int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig); int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet, @@ -65,6 +67,12 @@ static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, return 0; } +static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, + void *offset) +{ + return 0; +} + static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 34fbb1667bc..391d4fb2026 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -267,6 +267,15 @@ static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) return single_open(file, batadv_bla_claim_table_seq_print_text, net_dev); } + +static int batadv_bla_backbone_table_open(struct inode *inode, + struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_backbone_table_seq_print_text, + net_dev); +} + #endif static int batadv_transtable_local_open(struct inode *inode, struct file *file) @@ -305,6 +314,8 @@ static BATADV_DEBUGINFO(transtable_global, S_IRUGO, batadv_transtable_global_open); #ifdef CONFIG_BATMAN_ADV_BLA static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); +static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, + batadv_bla_backbone_table_open); #endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); @@ -316,6 +327,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_transtable_global, #ifdef CONFIG_BATMAN_ADV_BLA &batadv_debuginfo_bla_claim_table, + &batadv_debuginfo_bla_backbone_table, #endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, -- cgit v1.2.3-70-g09d2 From 6cc2ff82492d89f763e69ea6b8681926aceda610 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 24 Aug 2012 09:10:53 +0000 Subject: netdev/phy: add MDIO bus multiplexer driven by a memory-mapped device Add support for an MDIO bus multiplexer controlled by a simple memory-mapped device, like an FPGA. The device must be memory-mapped and contain only 8-bit registers (which keeps things simple). Tested on a Freescale P5020DS board which uses the "PIXIS" FPGA attached to the localbus. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../devicetree/bindings/net/mdio-mux-mmioreg.txt | 75 +++++++++ drivers/net/phy/Kconfig | 13 ++ drivers/net/phy/Makefile | 1 + drivers/net/phy/mdio-mux-mmioreg.c | 170 +++++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt create mode 100644 drivers/net/phy/mdio-mux-mmioreg.c (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt new file mode 100644 index 00000000000..8516929c725 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt @@ -0,0 +1,75 @@ +Properties for an MDIO bus multiplexer controlled by a memory-mapped device + +This is a special case of a MDIO bus multiplexer. A memory-mapped device, +like an FPGA, is used to control which child bus is connected. The mdio-mux +node must be a child of the memory-mapped device. The driver currently only +supports devices with eight-bit registers. + +Required properties in addition to the generic multiplexer properties: + +- compatible : string, must contain "mdio-mux-mmioreg" + +- reg : integer, contains the offset of the register that controls the bus + multiplexer. The size field in the 'reg' property is the size of + register, and must therefore be 1. + +- mux-mask : integer, contains an eight-bit mask that specifies which + bits in the register control the actual bus multiplexer. The + 'reg' property of each child mdio-mux node must be constrained by + this mask. + +Example: + +The FPGA node defines a memory-mapped FPGA with a register space of 0x30 bytes. +For the "EMI2" MDIO bus, register 9 (BRDCFG1) controls the mux on that bus. +A bitmask of 0x6 means that bits 1 and 2 (bit 0 is lsb) are the bits on +BRDCFG1 that control the actual mux. + + /* The FPGA node */ + fpga: board-control@3,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,p5020ds-fpga", "fsl,fpga-ngpixis"; + reg = <3 0 0x30>; + ranges = <0 3 0 0x30>; + + mdio-mux-emi2 { + compatible = "mdio-mux-mmioreg", "mdio-mux"; + mdio-parent-bus = <&xmdio0>; + #address-cells = <1>; + #size-cells = <0>; + reg = <9 1>; // BRDCFG1 + mux-mask = <0x6>; // EMI2 + + emi2_slot1: mdio@0 { // Slot 1 XAUI (FM2) + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + phy_xgmii_slot1: ethernet-phy@0 { + compatible = "ethernet-phy-ieee802.3-c45"; + reg = <4>; + }; + }; + + emi2_slot2: mdio@2 { // Slot 2 XAUI (FM1) + reg = <2>; + #address-cells = <1>; + #size-cells = <0>; + + phy_xgmii_slot2: ethernet-phy@4 { + compatible = "ethernet-phy-ieee802.3-c45"; + reg = <0>; + }; + }; + }; + }; + + /* The parent MDIO bus. */ + xmdio0: mdio@f1000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,fman-xmdio"; + reg = <0xf1000 0x1000>; + interrupts = <100 1 0 0>; + }; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 3090dc65a6f..983bbf4d5ef 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -159,6 +159,19 @@ config MDIO_BUS_MUX_GPIO several child MDIO busses to a parent bus. Child bus selection is under the control of GPIO lines. +config MDIO_BUS_MUX_MMIOREG + tristate "Support for MMIO device-controlled MDIO bus multiplexers" + depends on OF_MDIO + select MDIO_BUS_MUX + help + This module provides a driver for MDIO bus multiplexers that + are controlled via a simple memory-mapped device, like an FPGA. + The multiplexer connects one of several child MDIO busses to a + parent bus. Child bus selection is under the control of one of + the FPGA's registers. + + Currently, only 8-bit registers are supported. + endif # PHYLIB config MICREL_KS8995MA diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 6d2dc6c94f2..426674debae 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o +obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c new file mode 100644 index 00000000000..098239a98b1 --- /dev/null +++ b/drivers/net/phy/mdio-mux-mmioreg.c @@ -0,0 +1,170 @@ +/* + * Simple memory-mapped device MDIO MUX driver + * + * Author: Timur Tabi + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct mdio_mux_mmioreg_state { + void *mux_handle; + phys_addr_t phys; + uint8_t mask; +}; + +/* + * MDIO multiplexing switch function + * + * This function is called by the mdio-mux layer when it thinks the mdio bus + * multiplexer needs to switch. + * + * 'current_child' is the current value of the mux register (masked via + * s->mask). + * + * 'desired_child' is the value of the 'reg' property of the target child MDIO + * node. + * + * The first time this function is called, current_child == -1. + * + * If current_child == desired_child, then the mux is already set to the + * correct bus. + */ +static int mdio_mux_mmioreg_switch_fn(int current_child, int desired_child, + void *data) +{ + struct mdio_mux_mmioreg_state *s = data; + + if (current_child ^ desired_child) { + void *p = ioremap(s->phys, 1); + uint8_t x, y; + + if (!p) + return -ENOMEM; + + x = ioread8(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite8((x & ~s->mask) | desired_child, p); + pr_debug("%s: %02x -> %02x\n", __func__, x, y); + } + + iounmap(p); + } + + return 0; +} + +static int __devinit mdio_mux_mmioreg_probe(struct platform_device *pdev) +{ + struct device_node *np2, *np = pdev->dev.of_node; + struct mdio_mux_mmioreg_state *s; + struct resource res; + const __be32 *iprop; + int len, ret; + + dev_dbg(&pdev->dev, "probing node %s\n", np->full_name); + + s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + ret = of_address_to_resource(np, 0, &res); + if (ret) { + dev_err(&pdev->dev, "could not obtain memory map for node %s\n", + np->full_name); + return ret; + } + s->phys = res.start; + + if (resource_size(&res) != sizeof(uint8_t)) { + dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + return -EINVAL; + } + + iprop = of_get_property(np, "mux-mask", &len); + if (!iprop || len != sizeof(uint32_t)) { + dev_err(&pdev->dev, "missing or invalid mux-mask property\n"); + return -ENODEV; + } + if (be32_to_cpup(iprop) > 255) { + dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + return -EINVAL; + } + s->mask = be32_to_cpup(iprop); + + /* + * Verify that the 'reg' property of each child MDIO bus does not + * set any bits outside of the 'mask'. + */ + for_each_available_child_of_node(np, np2) { + iprop = of_get_property(np2, "reg", &len); + if (!iprop || len != sizeof(uint32_t)) { + dev_err(&pdev->dev, "mdio-mux child node %s is " + "missing a 'reg' property\n", np2->full_name); + return -ENODEV; + } + if (be32_to_cpup(iprop) & ~s->mask) { + dev_err(&pdev->dev, "mdio-mux child node %s has " + "a 'reg' value with unmasked bits\n", + np2->full_name); + return -ENODEV; + } + } + + ret = mdio_mux_init(&pdev->dev, mdio_mux_mmioreg_switch_fn, + &s->mux_handle, s); + if (ret) { + dev_err(&pdev->dev, "failed to register mdio-mux bus %s\n", + np->full_name); + return ret; + } + + pdev->dev.platform_data = s; + + return 0; +} + +static int __devexit mdio_mux_mmioreg_remove(struct platform_device *pdev) +{ + struct mdio_mux_mmioreg_state *s = dev_get_platdata(&pdev->dev); + + mdio_mux_uninit(s->mux_handle); + + return 0; +} + +static struct of_device_id mdio_mux_mmioreg_match[] = { + { + .compatible = "mdio-mux-mmioreg", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, mdio_mux_mmioreg_match); + +static struct platform_driver mdio_mux_mmioreg_driver = { + .driver = { + .name = "mdio-mux-mmioreg", + .owner = THIS_MODULE, + .of_match_table = mdio_mux_mmioreg_match, + }, + .probe = mdio_mux_mmioreg_probe, + .remove = __devexit_p(mdio_mux_mmioreg_remove), +}; + +module_platform_driver(mdio_mux_mmioreg_driver); + +MODULE_AUTHOR("Timur Tabi "); +MODULE_DESCRIPTION("Memory-mapped device MDIO MUX driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From 6c9ff979d1921e9fd05d89e1383121c2503759b9 Mon Sep 17 00:00:00 2001 From: Alex Bergmann Date: Fri, 31 Aug 2012 02:48:31 +0000 Subject: tcp: Increase timeout for SYN segments Commit 9ad7c049 ("tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side") changed the initRTO from 3secs to 1sec in accordance to RFC6298 (former RFC2988bis). This reduced the time till the last SYN retransmission packet gets sent from 93secs to 31secs. RFC1122 is stating that the retransmission should be done for at least 3 minutes, but this seems to be quite high. "However, the values of R1 and R2 may be different for SYN and data segments. In particular, R2 for a SYN segment MUST be set large enough to provide retransmission of the segment for at least 3 minutes. The application can close the connection (i.e., give up on the open attempt) sooner, of course." This patch increases the value of TCP_SYN_RETRIES to the value of 6, providing a retransmission window of 63secs. The comments for SYN and SYNACK retries have also been updated to describe the current settings. The same goes for the documentation file "Documentation/networking/ip-sysctl.txt". Signed-off-by: Alexander Bergmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++-- include/net/tcp.h | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ca447b35b83..d64e53124b8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -439,7 +439,9 @@ tcp_stdurg - BOOLEAN tcp_synack_retries - INTEGER Number of times SYNACKs for a passive TCP connection attempt will be retransmitted. Should not be higher than 255. Default value - is 5, which corresponds to ~180seconds. + is 5, which corresponds to 31seconds till the last retransmission + with the current initial RTO of 1second. With this the final timeout + for a passive TCP connection will happen after 63seconds. tcp_syncookies - BOOLEAN Only valid when the kernel was compiled with CONFIG_SYNCOOKIES @@ -478,7 +480,9 @@ tcp_fastopen - INTEGER tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 255. Default value - is 5, which corresponds to ~180seconds. + is 6, which corresponds to 63seconds till the last restransmission + with the current initial RTO of 1second. With this the final timeout + for an active TCP connection attempt will happen after 127seconds. tcp_timestamps - BOOLEAN Enable timestamps as defined in RFC1323. diff --git a/include/net/tcp.h b/include/net/tcp.h index 9a0021d16d9..0fca06f1646 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -98,11 +98,21 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); * 15 is ~13-30min depending on RTO. */ -#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a - * connection: ~180sec is RFC minimum */ +#define TCP_SYN_RETRIES 6 /* This is how many retries are done + * when active opening a connection. + * RFC1122 says the minimum retry MUST + * be at least 180secs. Nevertheless + * this value is corresponding to + * 63secs of retransmission with the + * current initial RTO. + */ -#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a - * connection: ~180sec is RFC minimum */ +#define TCP_SYNACK_RETRIES 5 /* This is how may retries are done + * when passive opening a connection. + * This is corresponding to 31secs of + * retransmission with the current + * initial RTO. + */ #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ -- cgit v1.2.3-70-g09d2 From d56631a66c0d0c9d662abfb38cd1f6326eeebd7c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 30 Aug 2012 05:50:43 +0000 Subject: net:stmmac: Remove bus_id from mdio platform data. This patch removes bus_id from mdio platform data, The reason to remove bus_id is, stmmac mdio bus_id is always same as stmmac bus-id, so there is no point in passing this in different variable. Also stmmac ethernet driver connects to phy with bus_id passed its platform data. So, having single bus-id is much simpler. Signed-off-by: Srinivas Kandagatla Signed-off-by: David S. Miller --- Documentation/networking/stmmac.txt | 5 ----- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 8 +++----- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 1 - include/linux/stmmac.h | 1 - 4 files changed, 3 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index c676b9cedbd..ef9ee71b4d7 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -173,7 +173,6 @@ Where: For MDIO bus The we have: struct stmmac_mdio_bus_data { - int bus_id; int (*phy_reset)(void *priv); unsigned int phy_mask; int *irqs; @@ -181,7 +180,6 @@ For MDIO bus The we have: }; Where: - o bus_id: bus identifier; o phy_reset: hook to reset the phy device attached to the bus. o phy_mask: phy mask passed when register the MDIO bus within the driver. o irqs: list of IRQs, one per PHY. @@ -230,9 +228,6 @@ there are two MAC cores: one MAC is for MDIO Bus/PHY emulation with fixed_link support. static struct stmmac_mdio_bus_data stmmac1_mdio_bus = { - .bus_id = 1, - | - |-> phy device on the bus_id 1 .phy_reset = phy_reset; | |-> function to provide the phy_reset on this board diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index e1f3d04a8c9..0376a5e6b2b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -177,7 +177,7 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->write = &stmmac_mdio_write; new_bus->reset = &stmmac_mdio_reset; snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", - new_bus->name, mdio_bus_data->bus_id); + new_bus->name, priv->plat->bus_id); new_bus->priv = ndev; new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; @@ -213,12 +213,10 @@ int stmmac_mdio_register(struct net_device *ndev) * and no PHY number was provided to the MAC, * use the one probed here. */ - if ((priv->plat->bus_id == mdio_bus_data->bus_id) && - (priv->plat->phy_addr == -1)) + if (priv->plat->phy_addr == -1) priv->plat->phy_addr = addr; - act = (priv->plat->bus_id == mdio_bus_data->bus_id) && - (priv->plat->phy_addr == addr); + act = (priv->plat->phy_addr == addr); switch (phydev->irq) { case PHY_POLL: irq_str = "POLL"; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 13afb8edfad..1f069b0f6af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -40,7 +40,6 @@ static void stmmac_default_data(void) plat_dat.has_gmac = 1; plat_dat.force_sf_dma_mode = 1; - mdio_data.bus_id = 1; mdio_data.phy_reset = NULL; mdio_data.phy_mask = 0; plat_dat.mdio_bus_data = &mdio_data; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index b69bdb1e08b..a1547ea3920 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -76,7 +76,6 @@ /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { - int bus_id; int (*phy_reset)(void *priv); unsigned int phy_mask; int *irqs; -- cgit v1.2.3-70-g09d2 From 1046716368979dee857a2b8a91c4a8833f21b9cb Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:11 +0000 Subject: tcp: TCP Fast Open Server - header & support functions This patch adds all the necessary data structure and support functions to implement TFO server side. It also documents a number of flags for the sysctl_tcp_fastopen knob, and adds a few Linux extension MIBs. In addition, it includes the following: 1. a new TCP_FASTOPEN socket option an application must call to supply a max backlog allowed in order to enable TFO on its listener. 2. A number of key data structures: "fastopen_rsk" in tcp_sock - for a big socket to access its request_sock for retransmission and ack processing purpose. It is non-NULL iff 3WHS not completed. "fastopenq" in request_sock_queue - points to a per Fast Open listener data structure "fastopen_queue" to keep track of qlen (# of outstanding Fast Open requests) and max_qlen, among other things. "listener" in tcp_request_sock - to point to the original listener for book-keeping purpose, i.e., to maintain qlen against max_qlen as part of defense against IP spoofing attack. 3. various data structure and functions, many in tcp_fastopen.c, to support server side Fast Open cookie operations, including /proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 29 +++++++++--- include/linux/snmp.h | 4 ++ include/linux/tcp.h | 45 ++++++++++++++++-- include/net/request_sock.h | 36 +++++++++++++++ include/net/tcp.h | 46 ++++++++++++++++--- net/ipv4/proc.c | 4 ++ net/ipv4/sysctl_net_ipv4.c | 45 ++++++++++++++++++ net/ipv4/tcp_fastopen.c | 83 +++++++++++++++++++++++++++++++++- net/ipv4/tcp_input.c | 4 +- 9 files changed, 276 insertions(+), 20 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d64e53124b8..c7fc1072494 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -467,16 +467,31 @@ tcp_syncookies - BOOLEAN tcp_fastopen - INTEGER Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data in the opening SYN packet. To use this feature, the client application - must not use connect(). Instead, it should use sendmsg() or sendto() - with MSG_FASTOPEN flag which performs a TCP handshake automatically. - - The values (bitmap) are: - 1: Enables sending data in the opening SYN on the client - 5: Enables sending data in the opening SYN on the client regardless - of cookie availability. + must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than + connect() to perform a TCP handshake automatically. + + The values (bitmap) are + 1: Enables sending data in the opening SYN on the client. + 2: Enables TCP Fast Open on the server side, i.e., allowing data in + a SYN packet to be accepted and passed to the application before + 3-way hand shake finishes. + 4: Send data in the opening SYN regardless of cookie availability and + without a cookie option. + 0x100: Accept SYN data w/o validating the cookie. + 0x200: Accept data-in-SYN w/o any cookie option present. + 0x400/0x800: Enable Fast Open on all listeners regardless of the + TCP_FASTOPEN socket option. The two different flags designate two + different ways of setting max_qlen without the TCP_FASTOPEN socket + option. Default: 0 + Note that the client & server side Fast Open flags (1 and 2 + respectively) must be also enabled before the rest of flags can take + effect. + + See include/net/tcp.h and the code for more details. + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 255. Default value diff --git a/include/linux/snmp.h b/include/linux/snmp.h index ad6e3a6bf9f..fdfba235f9f 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -241,6 +241,10 @@ enum LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ + LINUX_MIB_TCPFASTOPENPASSIVE, /* TCPFastOpenPassive*/ + LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ + LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ __LINUX_MIB_MAX }; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eb125a4c30b..ae46df59062 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -110,6 +110,7 @@ enum { #define TCP_REPAIR_QUEUE 20 #define TCP_QUEUE_SEQ 21 #define TCP_REPAIR_OPTIONS 22 +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ struct tcp_repair_opt { __u32 opt_code; @@ -246,6 +247,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open */ #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ +#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { @@ -312,9 +314,14 @@ struct tcp_request_sock { /* Only used by TCP MD5 Signature so far. */ const struct tcp_request_sock_ops *af_specific; #endif + struct sock *listener; /* needed for TFO */ u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ + u32 rcv_nxt; /* the ack # by SYNACK. For + * FastOpen it's the seq# + * after data-in-SYN. + */ }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -505,14 +512,18 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif -/* TCP fastopen related information */ - struct tcp_fastopen_request *fastopen_req; - /* When the cookie options are generated and exchanged, then this * object holds a reference to them (cookie_values->kref). Also * contains related tcp_cookie_transactions fields. */ struct tcp_cookie_values *cookie_values; + +/* TCP fastopen related information */ + struct tcp_fastopen_request *fastopen_req; + /* fastopen_rsk points to request_sock that resulted in this big + * socket. Used to retransmit SYNACKs etc. + */ + struct request_sock *fastopen_rsk; }; enum tsq_flags { @@ -552,6 +563,34 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } +static inline bool tcp_passive_fastopen(const struct sock *sk) +{ + return (sk->sk_state == TCP_SYN_RECV && + tcp_sk(sk)->fastopen_rsk != NULL); +} + +static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) +{ + return foc->len != -1; +} + +static inline int fastopen_init_queue(struct sock *sk, int backlog) +{ + struct request_sock_queue *queue = + &inet_csk(sk)->icsk_accept_queue; + + if (queue->fastopenq == NULL) { + queue->fastopenq = kzalloc( + sizeof(struct fastopen_queue), + sk->sk_allocation); + if (queue->fastopenq == NULL) + return -ENOMEM; + spin_lock_init(&queue->fastopenq->lock); + } + queue->fastopenq->max_qlen = backlog; + return 0; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_TCP_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 4c0766e201e..c3cdd6c9f44 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -106,6 +106,34 @@ struct listen_sock { struct request_sock *syn_table[0]; }; +/* + * For a TCP Fast Open listener - + * lock - protects the access to all the reqsk, which is co-owned by + * the listener and the child socket. + * qlen - pending TFO requests (still in TCP_SYN_RECV). + * max_qlen - max TFO reqs allowed before TFO is disabled. + * + * XXX (TFO) - ideally these fields can be made as part of "listen_sock" + * structure above. But there is some implementation difficulty due to + * listen_sock being part of request_sock_queue hence will be freed when + * a listener is stopped. But TFO related fields may continue to be + * accessed even after a listener is closed, until its sk_refcnt drops + * to 0 implying no more outstanding TFO reqs. One solution is to keep + * listen_opt around until sk_refcnt drops to 0. But there is some other + * complexity that needs to be resolved. E.g., a listener can be disabled + * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. + */ +struct fastopen_queue { + struct request_sock *rskq_rst_head; /* Keep track of past TFO */ + struct request_sock *rskq_rst_tail; /* requests that caused RST. + * This is part of the defense + * against spoofing attack. + */ + spinlock_t lock; + int qlen; /* # of pending (TCP_SYN_RECV) reqs */ + int max_qlen; /* != 0 iff TFO is currently enabled */ +}; + /** struct request_sock_queue - queue of request_socks * * @rskq_accept_head - FIFO head of established children @@ -129,6 +157,12 @@ struct request_sock_queue { u8 rskq_defer_accept; /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; + struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been + * enabled on this listener. Check + * max_qlen != 0 in fastopen_queue + * to determine if TFO is enabled + * right at this moment. + */ }; extern int reqsk_queue_alloc(struct request_sock_queue *queue, @@ -136,6 +170,8 @@ extern int reqsk_queue_alloc(struct request_sock_queue *queue, extern void __reqsk_queue_destroy(struct request_sock_queue *queue); extern void reqsk_queue_destroy(struct request_sock_queue *queue); +extern void reqsk_fastopen_remove(struct sock *sk, + struct request_sock *req, bool reset); static inline struct request_sock * reqsk_queue_yank_acceptq(struct request_sock_queue *queue) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0fca06f1646..9f8821e3293 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Bit Flags for sysctl_tcp_fastopen */ #define TFO_CLIENT_ENABLE 1 +#define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ +/* Process SYN data but skip cookie validation */ +#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 +/* Accept SYN data w/o any cookie option */ +#define TFO_SERVER_COOKIE_NOT_REQD 0x200 + +/* Force enable TFO on all listeners, i.e., not requiring the + * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + */ +#define TFO_SERVER_WO_SOCKOPT1 0x400 +#define TFO_SERVER_WO_SOCKOPT2 0x800 +/* Always create TFO child sockets on a TFO listener even when + * cookie/data not present. (For testing purpose!) + */ +#define TFO_SERVER_ALWAYS 0x1000 + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -421,12 +437,6 @@ extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, - int *syn_loss, unsigned long *last_syn_loss); -extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, - bool syn_lost); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); @@ -537,6 +547,7 @@ extern void tcp_send_delayed_ack(struct sock *sk); extern void tcp_cwnd_application_limited(struct sock *sk); extern void tcp_resume_early_retransmit(struct sock *sk); extern void tcp_rearm_rto(struct sock *sk); +extern void tcp_reset(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); @@ -586,6 +597,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); extern int tcp_mss_to_mtu(struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); +extern void tcp_init_buffer_space(struct sock *sk); static inline void tcp_bound_rto(const struct sock *sk) { @@ -1104,6 +1116,7 @@ static inline void tcp_openreq_init(struct request_sock *req, req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; ireq->tstamp_ok = rx_opt->tstamp_ok; @@ -1308,15 +1321,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key); +/* From tcp_fastopen.c */ +extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss); +extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie, + bool syn_lost); struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ u16 copied; /* queued in tcp_connect() */ }; - void tcp_free_fastopen_req(struct tcp_sock *tp); +extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; +int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc); + +#define TCP_FASTOPEN_KEY_LENGTH 16 + +/* Fastopen key context */ +struct tcp_fastopen_context { + struct crypto_cipher __rcu *tfm; + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct rcu_head rcu; +}; + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250..8de53e1ddd5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), + SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), + SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), + SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), + SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e78c79b558..9205e492dc9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } +int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct tcp_fastopen_context *ctxt; + int ret; + u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ + + tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); + if (!tbl.data) + return -ENOMEM; + + rcu_read_lock(); + ctxt = rcu_dereference(tcp_fastopen_ctx); + if (ctxt) + memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + rcu_read_unlock(); + + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", + user_key[0], user_key[1], user_key[2], user_key[3]); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (write && ret == 0) { + if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, + user_key + 2, user_key + 3) != 4) { + ret = -EINVAL; + goto bad_key; + } + tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + } + +bad_key: + pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", + user_key[0], user_key[1], user_key[2], user_key[3], + (char *)tbl.data, ret); + kfree(tbl.data); + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -385,6 +424,12 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, { .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d..8f7ef0ad80e 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,10 +1,91 @@ +#include #include #include +#include +#include +#include +#include +#include +#include -int sysctl_tcp_fastopen; +int sysctl_tcp_fastopen __read_mostly; + +struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + +static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); + +static void tcp_fastopen_ctx_free(struct rcu_head *head) +{ + struct tcp_fastopen_context *ctx = + container_of(head, struct tcp_fastopen_context, rcu); + crypto_free_cipher(ctx->tfm); + kfree(ctx); +} + +int tcp_fastopen_reset_cipher(void *key, unsigned int len) +{ + int err; + struct tcp_fastopen_context *ctx, *octx; + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->tfm = crypto_alloc_cipher("aes", 0, 0); + + if (IS_ERR(ctx->tfm)) { + err = PTR_ERR(ctx->tfm); +error: kfree(ctx); + pr_err("TCP: TFO aes cipher alloc error: %d\n", err); + return err; + } + err = crypto_cipher_setkey(ctx->tfm, key, len); + if (err) { + pr_err("TCP: TFO cipher key error: %d\n", err); + crypto_free_cipher(ctx->tfm); + goto error; + } + memcpy(ctx->key, key, len); + + spin_lock(&tcp_fastopen_ctx_lock); + + octx = rcu_dereference_protected(tcp_fastopen_ctx, + lockdep_is_held(&tcp_fastopen_ctx_lock)); + rcu_assign_pointer(tcp_fastopen_ctx, ctx); + spin_unlock(&tcp_fastopen_ctx_lock); + + if (octx) + call_rcu(&octx->rcu, tcp_fastopen_ctx_free); + return err; +} + +/* Computes the fastopen cookie for the peer. + * The peer address is a 128 bits long (pad with zeros for IPv4). + * + * The caller must check foc->len to determine if a valid cookie + * has been generated successfully. +*/ +void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) +{ + __be32 peer_addr[4] = { addr, 0, 0, 0 }; + struct tcp_fastopen_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(tcp_fastopen_ctx); + if (ctx) { + crypto_cipher_encrypt_one(ctx->tfm, + foc->val, + (__u8 *)peer_addr); + foc->len = TCP_FASTOPEN_COOKIE_SIZE; + } + rcu_read_unlock(); +} static int __init tcp_fastopen_init(void) { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(key, sizeof(key)); return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce4ffe9ed55..d47d5fe8f3f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -378,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* 4. Try to fixup all. It is made immediately after connection enters * established state. */ -static void tcp_init_buffer_space(struct sock *sk) +void tcp_init_buffer_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -4038,7 +4038,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) } /* When we get a reset we do this. */ -static void tcp_reset(struct sock *sk) +void tcp_reset(struct sock *sk) { /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { -- cgit v1.2.3-70-g09d2 From 965505015beccc4ec900798070165875b8e8dccf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 Aug 2012 20:23:39 +0000 Subject: netfilter: remove xt_NOTRACK It was scheduled to be removed for a long time. Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: "David S. Miller" Cc: netfilter@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- Documentation/feature-removal-schedule.txt | 8 ----- arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/mips/configs/ar7_defconfig | 1 - arch/mips/configs/bcm47xx_defconfig | 1 - arch/mips/configs/ip22_defconfig | 1 - arch/mips/configs/jazz_defconfig | 1 - arch/mips/configs/malta_defconfig | 1 - arch/mips/configs/markeins_defconfig | 1 - arch/mips/configs/nlm_xlp_defconfig | 1 - arch/mips/configs/nlm_xlr_defconfig | 1 - arch/mips/configs/rm200_defconfig | 1 - arch/powerpc/configs/pmac32_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/tile/configs/tilegx_defconfig | 1 - arch/tile/configs/tilepro_defconfig | 1 - net/netfilter/Kconfig | 13 -------- net/netfilter/Makefile | 1 - net/netfilter/xt_NOTRACK.c | 53 ------------------------------ 31 files changed, 102 deletions(-) delete mode 100644 net/netfilter/xt_NOTRACK.c (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index afaff312bf4..b4aab82f52f 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -353,14 +353,6 @@ Why: Internal alias support has been present in module-init-tools for some Who: Wey-Yi Guy ---------------------------- - -What: xt_NOTRACK -Files: net/netfilter/xt_NOTRACK.c -When: April 2011 -Why: Superseded by xt_CT -Who: Netfilter developer team - ---------------------------- What: IRQF_DISABLED diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index e93fdae10b2..90d3109c82f 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -67,7 +67,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 66b26c1e848..8f4f657fdbc 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -67,7 +67,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 15133251598..4571d33903f 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 67bb6fc117f..12f211733ba 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 3e35ce5fa46..215389a5407 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -66,7 +66,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index ae81e2d190c..cb9dfb30b67 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -61,7 +61,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 55d394edf63..8d5def4a31e 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -80,7 +80,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index af773743ee1..e2af46f530c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -64,7 +64,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index cdb70d66e53..7c9402b2097 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -65,7 +65,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 46bed78d065..19d23db690a 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -61,7 +61,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 86f7772bafb..ca6c0b4cab7 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -62,7 +62,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 288261456e1..c80941c7759 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -62,7 +62,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index 6cd5a519ce5..80e012fa409 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -56,7 +56,6 @@ CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m CONFIG_NETFILTER_XT_MATCH_MAC=m diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index ad15fb10322..b6fde2bb51b 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -96,7 +96,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index d1606569b00..936ec5a5ed8 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -87,7 +87,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 92a60aecad5..0315ee37a20 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -60,7 +60,6 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 5527abbb7de..cd732e5b4fd 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -86,7 +86,6 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig index 9c9a123016c..636f82b89fd 100644 --- a/arch/mips/configs/markeins_defconfig +++ b/arch/mips/configs/markeins_defconfig @@ -59,7 +59,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 28c6b276c21..84624b17b76 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -108,7 +108,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 138f698d7c0..44b473420d5 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -109,7 +109,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 2c0230e76d2..59d9d2fdcd4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -68,7 +68,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_COMMENT=m diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index f8b394a76ac..29767a8dfea 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -55,7 +55,6 @@ CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index db27c82e054..06b56245d78 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -92,7 +92,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 7bd1763877b..f55c27609fc 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -66,7 +66,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c47f2becfbc..be1cb6ea3a3 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -167,7 +167,6 @@ CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 0270620a169..8c5eff6d6df 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -134,7 +134,6 @@ CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index c11de27a9bc..e7a3dfcbcda 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -132,7 +132,6 @@ CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m CONFIG_NETFILTER_XT_TARGET_TRACE=m diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 052836e2490..3f4b3b4a776 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -670,19 +670,6 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_NOTRACK - tristate '"NOTRACK" target support' - depends on IP_NF_RAW || IP6_NF_RAW - depends on NF_CONNTRACK - help - The NOTRACK target allows a select rule to specify - which packets *not* to enter the conntrack/NAT - subsystem with all the consequences (no ICMP error tracking, - no protocol helpers for the selected packets). - - If you want to compile it as a module, say M here and read - . If unsure, say `N'. - config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 403ea812588..98244d4c75f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -85,7 +85,6 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o -obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c deleted file mode 100644 index 9d782181b6c..00000000000 --- a/net/netfilter/xt_NOTRACK.c +++ /dev/null @@ -1,53 +0,0 @@ -/* This is a module which is used for setting up fake conntracks - * on packets so that they are not seen by the conntrack/NAT code. - */ -#include -#include - -#include -#include - -MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_NOTRACK"); -MODULE_ALIAS("ip6t_NOTRACK"); - -static unsigned int -notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) - return XT_CONTINUE; - - /* Attach fake conntrack entry. - If there is a real ct entry correspondig to this packet, - it'll hang aroun till timing out. We don't deal with it - for performance reasons. JK */ - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); - - return XT_CONTINUE; -} - -static struct xt_target notrack_tg_reg __read_mostly = { - .name = "NOTRACK", - .revision = 0, - .family = NFPROTO_UNSPEC, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, -}; - -static int __init notrack_tg_init(void) -{ - return xt_register_target(¬rack_tg_reg); -} - -static void __exit notrack_tg_exit(void) -{ - xt_unregister_target(¬rack_tg_reg); -} - -module_init(notrack_tg_init); -module_exit(notrack_tg_exit); -- cgit v1.2.3-70-g09d2 From 9baa0b0364103dd726384c71db30b74044754743 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 13 Sep 2012 05:56:36 +0000 Subject: IB/ipoib: Add rtnl_link_ops support Add rtnl_link_ops to IPoIB, with the first usage being child device create/delete through them. Childs devices are now either legacy ones, created/deleted through the ipoib sysfs entries, or RTNL ones. Adding support for RTNL childs involved refactoring of ipoib_vlan_add which is now used by both the sysfs and the link_ops code. Also, added ndo_uninit entry to support calling unregister_netdevice_queue from the rtnl dellink entry. This required removal of calls to ipoib_dev_cleanup from the driver in flows which use unregister_netdevice, since the networking core will invoke ipoib_uninit which does exactly that. Signed-off-by: Erez Shitrit Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- Documentation/infiniband/ipoib.txt | 3 + drivers/infiniband/ulp/ipoib/Makefile | 3 +- drivers/infiniband/ulp/ipoib/ipoib.h | 13 +++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 25 ++++-- drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 114 +++++++++++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 102 +++++++++++++----------- include/linux/if_link.h | 11 +++ 7 files changed, 220 insertions(+), 51 deletions(-) create mode 100644 drivers/infiniband/ulp/ipoib/ipoib_netlink.c (limited to 'Documentation') diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt index 64eeb55d0c0..f2cfe265e83 100644 --- a/Documentation/infiniband/ipoib.txt +++ b/Documentation/infiniband/ipoib.txt @@ -24,6 +24,9 @@ Partitions and P_Keys The P_Key for any interface is given by the "pkey" file, and the main interface for a subinterface is in "parent." + Child interface create/delete can also be done using IPoIB's + rtnl_link_ops, where childs created using either way behave the same. + Datagram vs Connected modes The IPoIB driver supports two modes of operation: datagram and diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 3090100f0de..e5430dd5076 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile @@ -5,7 +5,8 @@ ib_ipoib-y := ipoib_main.o \ ipoib_multicast.o \ ipoib_verbs.o \ ipoib_vlan.o \ - ipoib_ethtool.o + ipoib_ethtool.o \ + ipoib_netlink.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca43901ed86..381f51b2ed6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -104,6 +104,10 @@ enum { MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, + + IPOIB_NON_CHILD = 0, + IPOIB_LEGACY_CHILD = 1, + IPOIB_RTNL_CHILD = 2, }; #define IPOIB_OP_RECV (1ul << 31) @@ -350,6 +354,7 @@ struct ipoib_dev_priv { struct net_device *parent; struct list_head child_intfs; struct list_head list; + int child_type; #ifdef CONFIG_INFINIBAND_IPOIB_CM struct ipoib_cm_dev_priv cm; @@ -509,6 +514,14 @@ void ipoib_event(struct ib_event_handler *handler, int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); +int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, + u16 pkey, int child_type); + +int __init ipoib_netlink_init(void); +void __exit ipoib_netlink_fini(void); + +void ipoib_setup(struct net_device *dev); + void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2085a3ee4..b3e97096c44 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -173,6 +173,11 @@ static int ipoib_stop(struct net_device *dev) return 0; } +static void ipoib_uninit(struct net_device *dev) +{ + ipoib_dev_cleanup(dev); +} + static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1262,6 +1267,9 @@ out: void ipoib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; + LIST_HEAD(head); + + ASSERT_RTNL(); ipoib_delete_debug_files(dev); @@ -1270,10 +1278,9 @@ void ipoib_dev_cleanup(struct net_device *dev) /* Stop GC on child */ set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); cancel_delayed_work(&cpriv->neigh_reap_task); - unregister_netdev(cpriv->dev); - ipoib_dev_cleanup(cpriv->dev); - free_netdev(cpriv->dev); + unregister_netdevice_queue(cpriv->dev, &head); } + unregister_netdevice_many(&head); ipoib_ib_dev_cleanup(dev); @@ -1291,6 +1298,7 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops = { + .ndo_uninit = ipoib_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1300,7 +1308,7 @@ static const struct net_device_ops ipoib_netdev_ops = { .ndo_set_rx_mode = ipoib_set_mcast_list, }; -static void ipoib_setup(struct net_device *dev) +void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1662,7 +1670,6 @@ static void ipoib_remove_one(struct ib_device *device) flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); - ipoib_dev_cleanup(priv->dev); free_netdev(priv->dev); } @@ -1714,8 +1721,15 @@ static int __init ipoib_init_module(void) if (ret) goto err_sa; + ret = ipoib_netlink_init(); + if (ret) + goto err_client; + return 0; +err_client: + ib_unregister_client(&ipoib_client); + err_sa: ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); @@ -1728,6 +1742,7 @@ err_fs: static void __exit ipoib_cleanup_module(void) { + ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); ipoib_unregister_debugfs(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c new file mode 100644 index 00000000000..a7dc5ea8370 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. - All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "ipoib.h" + +static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { + [IFLA_IPOIB_PKEY] = { .type = NLA_U16 }, +}; + +static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *pdev; + struct ipoib_dev_priv *ppriv; + u16 child_pkey; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!pdev) + return -ENODEV; + + ppriv = netdev_priv(pdev); + + if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) { + ipoib_warn(ppriv, "child creation disallowed for child devices\n"); + return -EINVAL; + } + + if (!data || !data[IFLA_IPOIB_PKEY]) { + ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n"); + child_pkey = ppriv->pkey; + } else + child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); + + err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD); + + return err; +} + +static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head) +{ + struct ipoib_dev_priv *priv, *ppriv; + + priv = netdev_priv(dev); + ppriv = netdev_priv(priv->parent); + + mutex_lock(&ppriv->vlan_mutex); + unregister_netdevice_queue(dev, head); + list_del(&priv->list); + mutex_unlock(&ppriv->vlan_mutex); +} + +static size_t ipoib_get_size(const struct net_device *dev) +{ + return nla_total_size(2); /* IFLA_IPOIB_PKEY */ +} + +static struct rtnl_link_ops ipoib_link_ops __read_mostly = { + .kind = "ipoib", + .maxtype = IFLA_IPOIB_MAX, + .policy = ipoib_policy, + .priv_size = sizeof(struct ipoib_dev_priv), + .setup = ipoib_setup, + .newlink = ipoib_new_child_link, + .dellink = ipoib_unregister_child_dev, + .get_size = ipoib_get_size, +}; + +int __init ipoib_netlink_init(void) +{ + return rtnl_link_register(&ipoib_link_ops); +} + +void __exit ipoib_netlink_fini(void) +{ + rtnl_link_unregister(&ipoib_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("ipoib"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index d7e9740c724..238bbf9b2be 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -49,47 +49,11 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); -int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, + u16 pkey, int type) { - struct ipoib_dev_priv *ppriv, *priv; - char intf_name[IFNAMSIZ]; int result; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - ppriv = netdev_priv(pdev); - - if (!rtnl_trylock()) - return restart_syscall(); - mutex_lock(&ppriv->vlan_mutex); - - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the child interfaces to make sure the Pkey doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - priv = NULL; - goto err; - } - - list_for_each_entry(priv, &ppriv->child_intfs, list) { - if (priv->pkey == pkey) { - result = -ENOTUNIQ; - priv = NULL; - goto err; - } - } - - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); - priv = ipoib_intf_alloc(intf_name); - if (!priv) { - result = -ENOMEM; - goto err; - } - priv->max_ib_mtu = ppriv->max_ib_mtu; /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); @@ -134,14 +98,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) if (device_create_file(&priv->dev->dev, &dev_attr_parent)) goto sysfs_failed; + priv->child_type = type; list_add_tail(&priv->list, &ppriv->child_intfs); - mutex_unlock(&ppriv->vlan_mutex); - rtnl_unlock(); - return 0; sysfs_failed: + result = -ENOMEM; ipoib_delete_debug_files(priv->dev); unregister_netdevice(priv->dev); @@ -149,11 +112,60 @@ register_failed: ipoib_dev_cleanup(priv->dev); err: + return result; +} + +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +{ + struct ipoib_dev_priv *ppriv, *priv; + char intf_name[IFNAMSIZ]; + struct ipoib_dev_priv *tpriv; + int result; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + ppriv = netdev_priv(pdev); + + snprintf(intf_name, sizeof intf_name, "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(intf_name); + if (!priv) + return -ENOMEM; + + if (!rtnl_trylock()) + return restart_syscall(); + + mutex_lock(&ppriv->vlan_mutex); + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == pkey) { + result = -ENOTUNIQ; + goto out; + } + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) { + result = -ENOTUNIQ; + goto out; + } + } + + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + +out: mutex_unlock(&ppriv->vlan_mutex); - rtnl_unlock(); - if (priv) + + if (result) free_netdev(priv->dev); + rtnl_unlock(); + return result; } @@ -171,9 +183,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) return restart_syscall(); mutex_lock(&ppriv->vlan_mutex); list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { - if (priv->pkey == pkey) { + if (priv->pkey == pkey && + priv->child_type == IPOIB_LEGACY_CHILD) { unregister_netdevice(priv->dev); - ipoib_dev_cleanup(priv->dev); list_del(&priv->list); dev = priv->dev; break; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index ac173bd2ab6..24c0dd09af5 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -398,4 +398,15 @@ struct ifla_port_vsi { __u8 pad[3]; }; + +/* IPoIB section */ + +enum { + IFLA_IPOIB_UNSPEC, + IFLA_IPOIB_PKEY, + __IFLA_IPOIB_MAX +}; + +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + #endif /* _LINUX_IF_LINK_H */ -- cgit v1.2.3-70-g09d2 From 5e953778a2aab04929a5e7b69f53dc26e39b079e Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Fri, 21 Sep 2012 08:31:19 +0000 Subject: ipconfig: add nameserver IPs to kernel-parameter ip= On small systems (e.g. embedded ones) IP addresses are often configured by bootloaders and get assigned to kernel via parameter "ip=". If set to "ip=dhcp", even nameserver entries from DHCP daemons are handled. These entries exported in /proc/net/pnp are commonly linked by /etc/resolv.conf. To configure nameservers for networks without DHCP, this patch adds option and to kernel-parameter 'ip='. Signed-off-by: Christoph Fritz Tested-by: Jan Weitzel Signed-off-by: David S. Miller --- Documentation/filesystems/nfs/nfsroot.txt | 10 +++++++- net/ipv4/ipconfig.c | 39 ++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index ffdd9d866ad..2d66ed68812 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -78,7 +78,8 @@ nfsroot=[:][,] flags = hard, nointr, noposix, cto, ac -ip=:::::: +ip=::::::: + : This parameter tells the kernel how to configure IP addresses of devices and also how to set up the IP routing table. It was originally called @@ -158,6 +159,13 @@ ip=:::::: Default: any + IP address of first nameserver. + Value gets exported by /proc/net/pnp which is often linked + on embedded systems by /etc/resolv.conf. + + IP address of secound nameserver. + Same as above. + nfsrootdebug diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 67e8a6b086e..1c0e7e05104 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -743,14 +743,22 @@ static void __init ic_bootp_init_ext(u8 *e) /* - * Initialize the DHCP/BOOTP mechanism. + * Predefine Nameservers */ -static inline void __init ic_bootp_init(void) +static inline void __init ic_nameservers_predef(void) { int i; for (i = 0; i < CONF_NAMESERVERS_MAX; i++) ic_nameservers[i] = NONE; +} + +/* + * Initialize the DHCP/BOOTP mechanism. + */ +static inline void __init ic_bootp_init(void) +{ + ic_nameservers_predef(); dev_add_pack(&bootp_packet_type); } @@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; + unsigned int i; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - pr_cont("\n"); + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) { + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + break; + } + for (i++; i < CONF_NAMESERVERS_MAX; i++) + if (ic_nameservers[i] != NONE) + pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); #endif /* !SILENT */ return 0; @@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + ic_nameservers_predef(); + /* Parse string for static IP assignment. */ ip = addrs; while (ip && *ip) { @@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs) ic_enable = 0; } break; + case 7: + if (CONF_NAMESERVERS_MAX >= 1) { + ic_nameservers[0] = in_aton(ip); + if (ic_nameservers[0] == ANY) + ic_nameservers[0] = NONE; + } + break; + case 8: + if (CONF_NAMESERVERS_MAX >= 2) { + ic_nameservers[1] = in_aton(ip); + if (ic_nameservers[1] == ANY) + ic_nameservers[1] = NONE; + } + break; } } ip = cp; -- cgit v1.2.3-70-g09d2 From 2469627d175c1d6d7812a1395dd3ef053a0e65b3 Mon Sep 17 00:00:00 2001 From: AnilKumar Ch Date: Thu, 2 Aug 2012 18:43:10 +0530 Subject: can: c_can: Add device tree support to Bosch C_CAN/D_CAN controller Add device tree support to C_CAN/D_CAN controller and usage details are added to device tree documentation. Driver was tested on AM335x EVM. Signed-off-by: AnilKumar Ch For the of binding doc: Reviewed-by: Stephen Warren Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/c_can.txt | 49 +++++++++++++++++++ drivers/net/can/c_can/c_can_platform.c | 55 +++++++++++++++------- 2 files changed, 87 insertions(+), 17 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/can/c_can.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt new file mode 100644 index 00000000000..8f1ae81228e --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/c_can.txt @@ -0,0 +1,49 @@ +Bosch C_CAN/D_CAN controller Device Tree Bindings +------------------------------------------------- + +Required properties: +- compatible : Should be "bosch,c_can" for C_CAN controllers and + "bosch,d_can" for D_CAN controllers. +- reg : physical base address and size of the C_CAN/D_CAN + registers map +- interrupts : property with a value describing the interrupt + number + +Optional properties: +- ti,hwmods : Must be "d_can" or "c_can", n being the + instance number + +Note: "ti,hwmods" field is used to fetch the base address and irq +resources from TI, omap hwmod data base during device registration. +Future plan is to migrate hwmod data base contents into device tree +blob so that, all the required data will be used from device tree dts +file. + +Example: + +Step1: SoC common .dtsi file + + dcan1: d_can@481d0000 { + compatible = "bosch,d_can"; + reg = <0x481d0000 0x2000>; + interrupts = <55>; + interrupt-parent = <&intc>; + status = "disabled"; + }; + +(or) + + dcan1: d_can@481d0000 { + compatible = "bosch,d_can"; + ti,hwmods = "d_can1"; + reg = <0x481d0000 0x2000>; + interrupts = <55>; + interrupt-parent = <&intc>; + status = "disabled"; + }; + +Step 2: board specific .dts file + + &dcan1 { + status = "okay"; + }; diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 5c46d1cc5cf..d0a66cf298b 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include @@ -65,17 +67,52 @@ static void c_can_plat_write_reg_aligned_to_32bit(struct c_can_priv *priv, writew(val, priv->base + 2 * priv->regs[index]); } +static struct platform_device_id c_can_id_table[] = { + [BOSCH_C_CAN_PLATFORM] = { + .name = KBUILD_MODNAME, + .driver_data = BOSCH_C_CAN, + }, + [BOSCH_C_CAN] = { + .name = "c_can", + .driver_data = BOSCH_C_CAN, + }, + [BOSCH_D_CAN] = { + .name = "d_can", + .driver_data = BOSCH_D_CAN, + }, { + } +}; + +static const struct of_device_id c_can_of_table[] = { + { .compatible = "bosch,c_can", .data = &c_can_id_table[BOSCH_C_CAN] }, + { .compatible = "bosch,d_can", .data = &c_can_id_table[BOSCH_D_CAN] }, + { /* sentinel */ }, +}; + static int __devinit c_can_plat_probe(struct platform_device *pdev) { int ret; void __iomem *addr; struct net_device *dev; struct c_can_priv *priv; + const struct of_device_id *match; const struct platform_device_id *id; struct resource *mem; int irq; struct clk *clk; + if (pdev->dev.of_node) { + match = of_match_device(c_can_of_table, &pdev->dev); + if (!match) { + dev_err(&pdev->dev, "Failed to find matching dt id\n"); + ret = -EINVAL; + goto exit; + } + id = match->data; + } else { + id = platform_get_device_id(pdev); + } + /* get the appropriate clk */ clk = clk_get(&pdev->dev, NULL); if (IS_ERR(clk)) { @@ -114,7 +151,6 @@ static int __devinit c_can_plat_probe(struct platform_device *pdev) } priv = netdev_priv(dev); - id = platform_get_device_id(pdev); switch (id->driver_data) { case BOSCH_C_CAN: priv->regs = reg_map_c_can; @@ -195,26 +231,11 @@ static int __devexit c_can_plat_remove(struct platform_device *pdev) return 0; } -static const struct platform_device_id c_can_id_table[] = { - [BOSCH_C_CAN_PLATFORM] = { - .name = KBUILD_MODNAME, - .driver_data = BOSCH_C_CAN, - }, - [BOSCH_C_CAN] = { - .name = "c_can", - .driver_data = BOSCH_C_CAN, - }, - [BOSCH_D_CAN] = { - .name = "d_can", - .driver_data = BOSCH_D_CAN, - }, { - } -}; - static struct platform_driver c_can_plat_driver = { .driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, + .of_match_table = of_match_ptr(c_can_of_table), }, .probe = c_can_plat_probe, .remove = __devexit_p(c_can_plat_remove), -- cgit v1.2.3-70-g09d2 From de46584675fad02b7f8255f31be0ea1be5cd185b Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sat, 22 Sep 2012 07:02:04 +0000 Subject: ptp: clarify the clock_name sysfs attribute There has been some confusion among PHC driver authors about the intended purpose of the clock_name attribute. This patch expands the documation in order to clarify how the clock_name field should be understood. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-ptp | 6 +++++- include/linux/ptp_clock_kernel.h | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp index d40d2b55050..05aeedf1779 100644 --- a/Documentation/ABI/testing/sysfs-ptp +++ b/Documentation/ABI/testing/sysfs-ptp @@ -19,7 +19,11 @@ Date: September 2010 Contact: Richard Cochran Description: This file contains the name of the PTP hardware clock - as a human readable string. + as a human readable string. The purpose of this + attribute is to provide the user with a "friendly + name" and to help distinguish PHY based devices from + MAC based ones. The string does not necessarily have + to be any kind of unique id. What: /sys/class/ptp/ptpN/max_adjustment Date: September 2010 diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 56c71b27112..f2dc6d8fc68 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -42,7 +42,9 @@ struct ptp_clock_request { * struct ptp_clock_info - decribes a PTP hardware clock * * @owner: The clock driver should set to THIS_MODULE. - * @name: A short name to identify the clock. + * @name: A short "friendly name" to identify the clock and to + * help distinguish PHY based devices from MAC based ones. + * The string is not meant to be a unique id. * @max_adj: The maximum possible frequency adjustment, in parts per billon. * @n_alarm: The number of programmable alarms. * @n_ext_ts: The number of external time stamp channels. -- cgit v1.2.3-70-g09d2 From 7b55279f6a454771d06e8ddf4a7114d17ae9a741 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 18 Sep 2012 19:31:58 +0200 Subject: NFC: Remove the pn544 raw driver This was scheduled for 3.6, we're late. Signed-off-by: Cong Wang Acked-by: Marcel Holtmann Signed-off-by: Samuel Ortiz --- Documentation/feature-removal-schedule.txt | 12 - drivers/nfc/Kconfig | 12 - drivers/nfc/Makefile | 1 - drivers/nfc/pn544.c | 893 ----------------------------- 4 files changed, 918 deletions(-) delete mode 100644 drivers/nfc/pn544.c (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index afaff312bf4..2cbbac8019c 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -516,18 +516,6 @@ Who: Kees Cook ---------------------------- -What: Removing the pn544 raw driver. -When: 3.6 -Why: With the introduction of the NFC HCI and SHDL kernel layers, pn544.c - is being replaced by pn544_hci.c which is accessible through the netlink - and socket NFC APIs. Moreover, pn544.c is outdated and does not seem to - work properly with the latest Android stacks. - Having 2 drivers for the same hardware is confusing and as such we - should only keep the one following the kernel NFC APIs. -Who: Samuel Ortiz - ----------------------------- - What: setitimer accepts user NULL pointer (value) When: 3.6 Why: setitimer is not returning -EFAULT if user pointer is NULL. This diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 89c57d1a7ca..ec857676c39 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -5,18 +5,6 @@ menu "Near Field Communication (NFC) devices" depends on NFC -config PN544_NFC - tristate "PN544 NFC driver" - depends on I2C - select CRC_CCITT - default n - ---help--- - Say yes if you want PN544 Near Field Communication driver. - This is for i2c connected version. If unsure, say N here. - - To compile this driver as a module, choose m here. The module will - be called pn544. - config PN544_HCI_NFC tristate "HCI PN544 NFC driver" depends on I2C && NFC_HCI && NFC_SHDLC diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index 473e44cef61..bf05831fdf0 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -2,7 +2,6 @@ # Makefile for nfc devices # -obj-$(CONFIG_PN544_NFC) += pn544.o obj-$(CONFIG_PN544_HCI_NFC) += pn544_hci.o obj-$(CONFIG_NFC_PN533) += pn533.o obj-$(CONFIG_NFC_WILINK) += nfcwilink.o diff --git a/drivers/nfc/pn544.c b/drivers/nfc/pn544.c deleted file mode 100644 index 724f65d8f9e..00000000000 --- a/drivers/nfc/pn544.c +++ /dev/null @@ -1,893 +0,0 @@ -/* - * Driver for the PN544 NFC chip. - * - * Copyright (C) Nokia Corporation - * - * Author: Jari Vanhala - * Contact: Matti Aaltonen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for TCGETS */ -#include - -#define DRIVER_CARD "PN544 NFC" -#define DRIVER_DESC "NFC driver for PN544" - -static struct i2c_device_id pn544_id_table[] = { - { PN544_DRIVER_NAME, 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pn544_id_table); - -#define HCI_MODE 0 -#define FW_MODE 1 - -enum pn544_state { - PN544_ST_COLD, - PN544_ST_FW_READY, - PN544_ST_READY, -}; - -enum pn544_irq { - PN544_NONE, - PN544_INT, -}; - -struct pn544_info { - struct miscdevice miscdev; - struct i2c_client *i2c_dev; - struct regulator_bulk_data regs[3]; - - enum pn544_state state; - wait_queue_head_t read_wait; - loff_t read_offset; - enum pn544_irq read_irq; - struct mutex read_mutex; /* Serialize read_irq access */ - struct mutex mutex; /* Serialize info struct access */ - u8 *buf; - size_t buflen; -}; - -static const char reg_vdd_io[] = "Vdd_IO"; -static const char reg_vbat[] = "VBat"; -static const char reg_vsim[] = "VSim"; - -/* sysfs interface */ -static ssize_t pn544_test(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct pn544_info *info = dev_get_drvdata(dev); - struct i2c_client *client = info->i2c_dev; - struct pn544_nfc_platform_data *pdata = client->dev.platform_data; - - return snprintf(buf, PAGE_SIZE, "%d\n", pdata->test()); -} - -static int pn544_enable(struct pn544_info *info, int mode) -{ - struct pn544_nfc_platform_data *pdata; - struct i2c_client *client = info->i2c_dev; - - int r; - - r = regulator_bulk_enable(ARRAY_SIZE(info->regs), info->regs); - if (r < 0) - return r; - - pdata = client->dev.platform_data; - info->read_irq = PN544_NONE; - if (pdata->enable) - pdata->enable(mode); - - if (mode) { - info->state = PN544_ST_FW_READY; - dev_dbg(&client->dev, "now in FW-mode\n"); - } else { - info->state = PN544_ST_READY; - dev_dbg(&client->dev, "now in HCI-mode\n"); - } - - usleep_range(10000, 15000); - - return 0; -} - -static void pn544_disable(struct pn544_info *info) -{ - struct pn544_nfc_platform_data *pdata; - struct i2c_client *client = info->i2c_dev; - - pdata = client->dev.platform_data; - if (pdata->disable) - pdata->disable(); - - info->state = PN544_ST_COLD; - - dev_dbg(&client->dev, "Now in OFF-mode\n"); - - msleep(PN544_RESETVEN_TIME); - - info->read_irq = PN544_NONE; - regulator_bulk_disable(ARRAY_SIZE(info->regs), info->regs); -} - -static int check_crc(u8 *buf, int buflen) -{ - u8 len; - u16 crc; - - len = buf[0] + 1; - if (len < 4 || len != buflen || len > PN544_MSG_MAX_SIZE) { - pr_err(PN544_DRIVER_NAME - ": CRC; corrupt packet len %u (%d)\n", len, buflen); - print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE, - 16, 2, buf, buflen, false); - return -EPERM; - } - crc = crc_ccitt(0xffff, buf, len - 2); - crc = ~crc; - - if (buf[len-2] != (crc & 0xff) || buf[len-1] != (crc >> 8)) { - pr_err(PN544_DRIVER_NAME ": CRC error 0x%x != 0x%x 0x%x\n", - crc, buf[len-1], buf[len-2]); - - print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE, - 16, 2, buf, buflen, false); - return -EPERM; - } - return 0; -} - -static int pn544_i2c_write(struct i2c_client *client, u8 *buf, int len) -{ - int r; - - if (len < 4 || len != (buf[0] + 1)) { - dev_err(&client->dev, "%s: Illegal message length: %d\n", - __func__, len); - return -EINVAL; - } - - if (check_crc(buf, len)) - return -EINVAL; - - usleep_range(3000, 6000); - - r = i2c_master_send(client, buf, len); - dev_dbg(&client->dev, "send: %d\n", r); - - if (r == -EREMOTEIO) { /* Retry, chip was in standby */ - usleep_range(6000, 10000); - r = i2c_master_send(client, buf, len); - dev_dbg(&client->dev, "send2: %d\n", r); - } - - if (r != len) - return -EREMOTEIO; - - return r; -} - -static int pn544_i2c_read(struct i2c_client *client, u8 *buf, int buflen) -{ - int r; - u8 len; - - /* - * You could read a packet in one go, but then you'd need to read - * max size and rest would be 0xff fill, so we do split reads. - */ - r = i2c_master_recv(client, &len, 1); - dev_dbg(&client->dev, "recv1: %d\n", r); - - if (r != 1) - return -EREMOTEIO; - - if (len < PN544_LLC_HCI_OVERHEAD) - len = PN544_LLC_HCI_OVERHEAD; - else if (len > (PN544_MSG_MAX_SIZE - 1)) - len = PN544_MSG_MAX_SIZE - 1; - - if (1 + len > buflen) /* len+(data+crc16) */ - return -EMSGSIZE; - - buf[0] = len; - - r = i2c_master_recv(client, buf + 1, len); - dev_dbg(&client->dev, "recv2: %d\n", r); - - if (r != len) - return -EREMOTEIO; - - usleep_range(3000, 6000); - - return r + 1; -} - -static int pn544_fw_write(struct i2c_client *client, u8 *buf, int len) -{ - int r; - - dev_dbg(&client->dev, "%s\n", __func__); - - if (len < PN544_FW_HEADER_SIZE || - (PN544_FW_HEADER_SIZE + (buf[1] << 8) + buf[2]) != len) - return -EINVAL; - - r = i2c_master_send(client, buf, len); - dev_dbg(&client->dev, "fw send: %d\n", r); - - if (r == -EREMOTEIO) { /* Retry, chip was in standby */ - usleep_range(6000, 10000); - r = i2c_master_send(client, buf, len); - dev_dbg(&client->dev, "fw send2: %d\n", r); - } - - if (r != len) - return -EREMOTEIO; - - return r; -} - -static int pn544_fw_read(struct i2c_client *client, u8 *buf, int buflen) -{ - int r, len; - - if (buflen < PN544_FW_HEADER_SIZE) - return -EINVAL; - - r = i2c_master_recv(client, buf, PN544_FW_HEADER_SIZE); - dev_dbg(&client->dev, "FW recv1: %d\n", r); - - if (r < 0) - return r; - - if (r < PN544_FW_HEADER_SIZE) - return -EINVAL; - - len = (buf[1] << 8) + buf[2]; - if (len == 0) /* just header, no additional data */ - return r; - - if (len > buflen - PN544_FW_HEADER_SIZE) - return -EMSGSIZE; - - r = i2c_master_recv(client, buf + PN544_FW_HEADER_SIZE, len); - dev_dbg(&client->dev, "fw recv2: %d\n", r); - - if (r != len) - return -EINVAL; - - return r + PN544_FW_HEADER_SIZE; -} - -static irqreturn_t pn544_irq_thread_fn(int irq, void *dev_id) -{ - struct pn544_info *info = dev_id; - struct i2c_client *client = info->i2c_dev; - - BUG_ON(!info); - BUG_ON(irq != info->i2c_dev->irq); - - dev_dbg(&client->dev, "IRQ\n"); - - mutex_lock(&info->read_mutex); - info->read_irq = PN544_INT; - mutex_unlock(&info->read_mutex); - - wake_up_interruptible(&info->read_wait); - - return IRQ_HANDLED; -} - -static enum pn544_irq pn544_irq_state(struct pn544_info *info) -{ - enum pn544_irq irq; - - mutex_lock(&info->read_mutex); - irq = info->read_irq; - mutex_unlock(&info->read_mutex); - /* - * XXX: should we check GPIO-line status directly? - * return pdata->irq_status() ? PN544_INT : PN544_NONE; - */ - - return irq; -} - -static ssize_t pn544_read(struct file *file, char __user *buf, - size_t count, loff_t *offset) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - enum pn544_irq irq; - size_t len; - int r = 0; - - dev_dbg(&client->dev, "%s: info: %p, count: %zu\n", __func__, - info, count); - - mutex_lock(&info->mutex); - - if (info->state == PN544_ST_COLD) { - r = -ENODEV; - goto out; - } - - irq = pn544_irq_state(info); - if (irq == PN544_NONE) { - if (file->f_flags & O_NONBLOCK) { - r = -EAGAIN; - goto out; - } - - if (wait_event_interruptible(info->read_wait, - (info->read_irq == PN544_INT))) { - r = -ERESTARTSYS; - goto out; - } - } - - if (info->state == PN544_ST_FW_READY) { - len = min(count, info->buflen); - - mutex_lock(&info->read_mutex); - r = pn544_fw_read(info->i2c_dev, info->buf, len); - info->read_irq = PN544_NONE; - mutex_unlock(&info->read_mutex); - - if (r < 0) { - dev_err(&info->i2c_dev->dev, "FW read failed: %d\n", r); - goto out; - } - - print_hex_dump(KERN_DEBUG, "FW read: ", DUMP_PREFIX_NONE, - 16, 2, info->buf, r, false); - - *offset += r; - if (copy_to_user(buf, info->buf, r)) { - r = -EFAULT; - goto out; - } - } else { - len = min(count, info->buflen); - - mutex_lock(&info->read_mutex); - r = pn544_i2c_read(info->i2c_dev, info->buf, len); - info->read_irq = PN544_NONE; - mutex_unlock(&info->read_mutex); - - if (r < 0) { - dev_err(&info->i2c_dev->dev, "read failed (%d)\n", r); - goto out; - } - print_hex_dump(KERN_DEBUG, "read: ", DUMP_PREFIX_NONE, - 16, 2, info->buf, r, false); - - *offset += r; - if (copy_to_user(buf, info->buf, r)) { - r = -EFAULT; - goto out; - } - } - -out: - mutex_unlock(&info->mutex); - - return r; -} - -static unsigned int pn544_poll(struct file *file, poll_table *wait) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - int r = 0; - - dev_dbg(&client->dev, "%s: info: %p\n", __func__, info); - - mutex_lock(&info->mutex); - - if (info->state == PN544_ST_COLD) { - r = -ENODEV; - goto out; - } - - poll_wait(file, &info->read_wait, wait); - - if (pn544_irq_state(info) == PN544_INT) { - r = POLLIN | POLLRDNORM; - goto out; - } -out: - mutex_unlock(&info->mutex); - - return r; -} - -static ssize_t pn544_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - ssize_t len; - int r; - - dev_dbg(&client->dev, "%s: info: %p, count %zu\n", __func__, - info, count); - - mutex_lock(&info->mutex); - - if (info->state == PN544_ST_COLD) { - r = -ENODEV; - goto out; - } - - /* - * XXX: should we detect rset-writes and clean possible - * read_irq state - */ - if (info->state == PN544_ST_FW_READY) { - size_t fw_len; - - if (count < PN544_FW_HEADER_SIZE) { - r = -EINVAL; - goto out; - } - - len = min(count, info->buflen); - if (copy_from_user(info->buf, buf, len)) { - r = -EFAULT; - goto out; - } - - print_hex_dump(KERN_DEBUG, "FW write: ", DUMP_PREFIX_NONE, - 16, 2, info->buf, len, false); - - fw_len = PN544_FW_HEADER_SIZE + (info->buf[1] << 8) + - info->buf[2]; - - if (len > fw_len) /* 1 msg at a time */ - len = fw_len; - - r = pn544_fw_write(info->i2c_dev, info->buf, len); - } else { - if (count < PN544_LLC_MIN_SIZE) { - r = -EINVAL; - goto out; - } - - len = min(count, info->buflen); - if (copy_from_user(info->buf, buf, len)) { - r = -EFAULT; - goto out; - } - - print_hex_dump(KERN_DEBUG, "write: ", DUMP_PREFIX_NONE, - 16, 2, info->buf, len, false); - - if (len > (info->buf[0] + 1)) /* 1 msg at a time */ - len = info->buf[0] + 1; - - r = pn544_i2c_write(info->i2c_dev, info->buf, len); - } -out: - mutex_unlock(&info->mutex); - - return r; - -} - -static long pn544_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - struct pn544_nfc_platform_data *pdata; - unsigned int val; - int r = 0; - - dev_dbg(&client->dev, "%s: info: %p, cmd: 0x%x\n", __func__, info, cmd); - - mutex_lock(&info->mutex); - - if (info->state == PN544_ST_COLD) { - r = -ENODEV; - goto out; - } - - pdata = info->i2c_dev->dev.platform_data; - switch (cmd) { - case PN544_GET_FW_MODE: - dev_dbg(&client->dev, "%s: PN544_GET_FW_MODE\n", __func__); - - val = (info->state == PN544_ST_FW_READY); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) { - r = -EFAULT; - goto out; - } - - break; - - case PN544_SET_FW_MODE: - dev_dbg(&client->dev, "%s: PN544_SET_FW_MODE\n", __func__); - - if (copy_from_user(&val, (void __user *)arg, sizeof(val))) { - r = -EFAULT; - goto out; - } - - if (val) { - if (info->state == PN544_ST_FW_READY) - break; - - pn544_disable(info); - r = pn544_enable(info, FW_MODE); - if (r < 0) - goto out; - } else { - if (info->state == PN544_ST_READY) - break; - pn544_disable(info); - r = pn544_enable(info, HCI_MODE); - if (r < 0) - goto out; - } - file->f_pos = info->read_offset; - break; - - case TCGETS: - dev_dbg(&client->dev, "%s: TCGETS\n", __func__); - - r = -ENOIOCTLCMD; - break; - - default: - dev_err(&client->dev, "Unknown ioctl 0x%x\n", cmd); - r = -ENOIOCTLCMD; - break; - } - -out: - mutex_unlock(&info->mutex); - - return r; -} - -static int pn544_open(struct inode *inode, struct file *file) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - int r = 0; - - dev_dbg(&client->dev, "%s: info: %p, client %p\n", __func__, - info, info->i2c_dev); - - mutex_lock(&info->mutex); - - /* - * Only 1 at a time. - * XXX: maybe user (counter) would work better - */ - if (info->state != PN544_ST_COLD) { - r = -EBUSY; - goto out; - } - - file->f_pos = info->read_offset; - r = pn544_enable(info, HCI_MODE); - -out: - mutex_unlock(&info->mutex); - return r; -} - -static int pn544_close(struct inode *inode, struct file *file) -{ - struct pn544_info *info = container_of(file->private_data, - struct pn544_info, miscdev); - struct i2c_client *client = info->i2c_dev; - - dev_dbg(&client->dev, "%s: info: %p, client %p\n", - __func__, info, info->i2c_dev); - - mutex_lock(&info->mutex); - pn544_disable(info); - mutex_unlock(&info->mutex); - - return 0; -} - -static const struct file_operations pn544_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = pn544_read, - .write = pn544_write, - .poll = pn544_poll, - .open = pn544_open, - .release = pn544_close, - .unlocked_ioctl = pn544_ioctl, -}; - -#ifdef CONFIG_PM -static int pn544_suspend(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct pn544_info *info; - int r = 0; - - dev_info(&client->dev, "***\n%s: client %p\n***\n", __func__, client); - - info = i2c_get_clientdata(client); - dev_info(&client->dev, "%s: info: %p, client %p\n", __func__, - info, client); - - mutex_lock(&info->mutex); - - switch (info->state) { - case PN544_ST_FW_READY: - /* Do not suspend while upgrading FW, please! */ - r = -EPERM; - break; - - case PN544_ST_READY: - /* - * CHECK: Device should be in standby-mode. No way to check? - * Allowing low power mode for the regulator is potentially - * dangerous if pn544 does not go to suspension. - */ - break; - - case PN544_ST_COLD: - break; - }; - - mutex_unlock(&info->mutex); - return r; -} - -static int pn544_resume(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct pn544_info *info = i2c_get_clientdata(client); - int r = 0; - - dev_dbg(&client->dev, "%s: info: %p, client %p\n", __func__, - info, client); - - mutex_lock(&info->mutex); - - switch (info->state) { - case PN544_ST_READY: - /* - * CHECK: If regulator low power mode is allowed in - * pn544_suspend, we should go back to normal mode - * here. - */ - break; - - case PN544_ST_COLD: - break; - - case PN544_ST_FW_READY: - break; - }; - - mutex_unlock(&info->mutex); - - return r; -} - -static SIMPLE_DEV_PM_OPS(pn544_pm_ops, pn544_suspend, pn544_resume); -#endif - -static struct device_attribute pn544_attr = - __ATTR(nfc_test, S_IRUGO, pn544_test, NULL); - -static int __devinit pn544_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct pn544_info *info; - struct pn544_nfc_platform_data *pdata; - int r = 0; - - dev_dbg(&client->dev, "%s\n", __func__); - dev_dbg(&client->dev, "IRQ: %d\n", client->irq); - - /* private data allocation */ - info = kzalloc(sizeof(struct pn544_info), GFP_KERNEL); - if (!info) { - dev_err(&client->dev, - "Cannot allocate memory for pn544_info.\n"); - r = -ENOMEM; - goto err_info_alloc; - } - - info->buflen = max(PN544_MSG_MAX_SIZE, PN544_MAX_I2C_TRANSFER); - info->buf = kzalloc(info->buflen, GFP_KERNEL); - if (!info->buf) { - dev_err(&client->dev, - "Cannot allocate memory for pn544_info->buf.\n"); - r = -ENOMEM; - goto err_buf_alloc; - } - - info->regs[0].supply = reg_vdd_io; - info->regs[1].supply = reg_vbat; - info->regs[2].supply = reg_vsim; - r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs), - info->regs); - if (r < 0) - goto err_kmalloc; - - info->i2c_dev = client; - info->state = PN544_ST_COLD; - info->read_irq = PN544_NONE; - mutex_init(&info->read_mutex); - mutex_init(&info->mutex); - init_waitqueue_head(&info->read_wait); - i2c_set_clientdata(client, info); - pdata = client->dev.platform_data; - if (!pdata) { - dev_err(&client->dev, "No platform data\n"); - r = -EINVAL; - goto err_reg; - } - - if (!pdata->request_resources) { - dev_err(&client->dev, "request_resources() missing\n"); - r = -EINVAL; - goto err_reg; - } - - r = pdata->request_resources(client); - if (r) { - dev_err(&client->dev, "Cannot get platform resources\n"); - goto err_reg; - } - - r = request_threaded_irq(client->irq, NULL, pn544_irq_thread_fn, - IRQF_TRIGGER_RISING, PN544_DRIVER_NAME, - info); - if (r < 0) { - dev_err(&client->dev, "Unable to register IRQ handler\n"); - goto err_res; - } - - /* If we don't have the test we don't need the sysfs file */ - if (pdata->test) { - r = device_create_file(&client->dev, &pn544_attr); - if (r) { - dev_err(&client->dev, - "sysfs registration failed, error %d\n", r); - goto err_irq; - } - } - - info->miscdev.minor = MISC_DYNAMIC_MINOR; - info->miscdev.name = PN544_DRIVER_NAME; - info->miscdev.fops = &pn544_fops; - info->miscdev.parent = &client->dev; - r = misc_register(&info->miscdev); - if (r < 0) { - dev_err(&client->dev, "Device registration failed\n"); - goto err_sysfs; - } - - dev_dbg(&client->dev, "%s: info: %p, pdata %p, client %p\n", - __func__, info, pdata, client); - - return 0; - -err_sysfs: - if (pdata->test) - device_remove_file(&client->dev, &pn544_attr); -err_irq: - free_irq(client->irq, info); -err_res: - if (pdata->free_resources) - pdata->free_resources(); -err_reg: - regulator_bulk_free(ARRAY_SIZE(info->regs), info->regs); -err_kmalloc: - kfree(info->buf); -err_buf_alloc: - kfree(info); -err_info_alloc: - return r; -} - -static __devexit int pn544_remove(struct i2c_client *client) -{ - struct pn544_info *info = i2c_get_clientdata(client); - struct pn544_nfc_platform_data *pdata = client->dev.platform_data; - - dev_dbg(&client->dev, "%s\n", __func__); - - misc_deregister(&info->miscdev); - if (pdata->test) - device_remove_file(&client->dev, &pn544_attr); - - if (info->state != PN544_ST_COLD) { - if (pdata->disable) - pdata->disable(); - - info->read_irq = PN544_NONE; - } - - free_irq(client->irq, info); - if (pdata->free_resources) - pdata->free_resources(); - - regulator_bulk_free(ARRAY_SIZE(info->regs), info->regs); - kfree(info->buf); - kfree(info); - - return 0; -} - -static struct i2c_driver pn544_driver = { - .driver = { - .name = PN544_DRIVER_NAME, -#ifdef CONFIG_PM - .pm = &pn544_pm_ops, -#endif - }, - .probe = pn544_probe, - .id_table = pn544_id_table, - .remove = __devexit_p(pn544_remove), -}; - -static int __init pn544_init(void) -{ - int r; - - pr_debug(DRIVER_DESC ": %s\n", __func__); - - r = i2c_add_driver(&pn544_driver); - if (r) { - pr_err(PN544_DRIVER_NAME ": driver registration failed\n"); - return r; - } - - return 0; -} - -static void __exit pn544_exit(void) -{ - i2c_del_driver(&pn544_driver); - pr_info(DRIVER_DESC ", Exiting.\n"); -} - -module_init(pn544_init); -module_exit(pn544_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); -- cgit v1.2.3-70-g09d2 From d7559982701ac500662b2e8e150ff34f7faf0281 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 26 Sep 2012 07:24:27 +0000 Subject: net: ti cpsw ethernet: allow reading phy interface mode from DT Allow users to specify the phy interface of the CPSW slaves. The new node parameter is called "phy_if_mode" and is optional. The original behaviour of the driver is preserved when not given. Signed-off-by: Daniel Mack Cc: Mugunthan V N Cc: Vaibhav Hiremath Cc: David S. Miller Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 3 +++ drivers/net/ethernet/ti/cpsw.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index dcaabe9fe86..d87f7d2b7b0 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -25,6 +25,8 @@ Required properties: - slave_reg_ofs : Specifies slave register offset - sliver_reg_ofs : Specifies slave sliver register offset - phy_id : Specifies slave phy id +- phy_if_mode : Specified slave phy interface mode (optional) + (one of the PHY_INTERFACE_MODE_* as numerical value) - mac-address : Specifies slave MAC address Optional properties: @@ -62,6 +64,7 @@ Examples: slave_reg_ofs = <0x208>; sliver_reg_ofs = <0xd80>; phy_id = "davinci_mdio.16:00"; + phy_if_mode = <6>; /* PHY_INTERFACE_MODE_RGMII */ /* Filled in by U-Boot */ mac-address = [ 00 00 00 00 00 00 ]; }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index df55e240374..3e12db6f9c0 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -855,6 +855,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } slave_data->sliver_reg_ofs = prop; + if (!of_property_read_u32(slave_node, "phy_if_mode", &prop)) + slave_data->phy_if = prop; + mac_addr = of_get_mac_address(slave_node); if (mac_addr) memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); -- cgit v1.2.3-70-g09d2 From d4e62648274857a2b3a5bc62f910c062e71b2d9b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Oct 2012 17:39:31 -0400 Subject: Revert "net: ti cpsw ethernet: allow reading phy interface mode from DT" This reverts commit d7559982701ac500662b2e8e150ff34f7faf0281. It wasn't meant to be applied, commit 342b7b741d76bc8aadeff844634348bb2a343d19 ("net: ti cpsw ethernet: set IFCTL_A bit in MACCONTROL") was redone in such a way to make this commit unnecessary. Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 3 --- drivers/net/ethernet/ti/cpsw.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index d87f7d2b7b0..dcaabe9fe86 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -25,8 +25,6 @@ Required properties: - slave_reg_ofs : Specifies slave register offset - sliver_reg_ofs : Specifies slave sliver register offset - phy_id : Specifies slave phy id -- phy_if_mode : Specified slave phy interface mode (optional) - (one of the PHY_INTERFACE_MODE_* as numerical value) - mac-address : Specifies slave MAC address Optional properties: @@ -64,7 +62,6 @@ Examples: slave_reg_ofs = <0x208>; sliver_reg_ofs = <0xd80>; phy_id = "davinci_mdio.16:00"; - phy_if_mode = <6>; /* PHY_INTERFACE_MODE_RGMII */ /* Filled in by U-Boot */ mac-address = [ 00 00 00 00 00 00 ]; }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 3e12db6f9c0..df55e240374 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -855,9 +855,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } slave_data->sliver_reg_ofs = prop; - if (!of_property_read_u32(slave_node, "phy_if_mode", &prop)) - slave_data->phy_if = prop; - mac_addr = of_get_mac_address(slave_node); if (mac_addr) memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); -- cgit v1.2.3-70-g09d2 From d342894c5d2f8c7df194c793ec4059656e09ca31 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Oct 2012 12:32:35 +0000 Subject: vxlan: virtual extensible lan This is an implementation of Virtual eXtensible Local Area Network as described in draft RFC: http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02 The driver integrates a Virtual Tunnel Endpoint (VTEP) functionality that learns MAC to IP address mapping. This implementation has not been tested only against the Linux userspace implementation using TAP, not against other vendor's equipment. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/vxlan.txt | 47 ++ drivers/net/Kconfig | 13 + drivers/net/Makefile | 1 + drivers/net/vxlan.c | 1217 ++++++++++++++++++++++++++++++++++++ include/linux/if_link.h | 16 + 5 files changed, 1294 insertions(+) create mode 100644 Documentation/networking/vxlan.txt create mode 100644 drivers/net/vxlan.c (limited to 'Documentation') diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt new file mode 100644 index 00000000000..5b34b762d7d --- /dev/null +++ b/Documentation/networking/vxlan.txt @@ -0,0 +1,47 @@ +Virtual eXtensible Local Area Networking documentation +====================================================== + +The VXLAN protocol is a tunnelling protocol that is designed to +solve the problem of limited number of available VLAN's (4096). +With VXLAN identifier is expanded to 24 bits. + +It is a draft RFC standard, that is implemented by Cisco Nexus, +Vmware and Brocade. The protocol runs over UDP using a single +destination port (still not standardized by IANA). +This document describes the Linux kernel tunnel device, +there is also an implantation of VXLAN for Openvswitch. + +Unlike most tunnels, a VXLAN is a 1 to N network, not just point +to point. A VXLAN device can either dynamically learn the IP address +of the other end, in a manner similar to a learning bridge, or the +forwarding entries can be configured statically. + +The management of vxlan is done in a similar fashion to it's +too closest neighbors GRE and VLAN. Configuring VXLAN requires +the version of iproute2 that matches the kernel release +where VXLAN was first merged upstream. + +1. Create vxlan device + # ip li add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth1 + +This creates a new device (vxlan0). The device uses the +the multicast group 239.1.1.1 over eth1 to handle packets where +no entry is in the forwarding table. + +2. Delete vxlan device + # ip link delete vxlan0 + +3. Show vxlan info + # ip -d show vxlan0 + +It is possible to create, destroy and display the vxlan +forwarding table using the new bridge command. + +1. Create forwarding table entry + # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0 + +2. Delete forwarding table entry + # bridge fdb delete 00:17:42:8a:b4:05 + +3. Show forwarding table + # bridge fdb show dev vxlan0 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 707ab7bd4ea..ed5041e96b2 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,19 @@ config MACVTAP To compile this driver as a module, choose M here: the module will be called macvtap. +config VXLAN + tristate "Virtual eXtensible Local Area Network (VXLAN)" + depends on EXPERIMENTAL + ---help--- + This allows one to create vxlan virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. VXLAN is often used + to tunnel virtual network infrastructure in virtualized environments. + For more information see: + http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02 + + To compile this driver as a module, choose M here: the module + will be called vxlan. + config NETCONSOLE tristate "Network console logging support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index b682a1de7be..335db78fd98 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o +obj-$(CONFIG_VXLAN) += vxlan.o # # Networking Drivers diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c new file mode 100644 index 00000000000..f87a98f1aec --- /dev/null +++ b/drivers/net/vxlan.c @@ -0,0 +1,1217 @@ +/* + * VXLAN: Virtual eXtensiable Local Area Network + * + * Copyright (c) 2012 Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * TODO + * - use IANA UDP port number (when defined) + * - IPv6 (not in RFC) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VXLAN_VERSION "0.1" + +#define VNI_HASH_BITS 10 +#define VNI_HASH_SIZE (1<vni_list[hash_32(id, VNI_HASH_BITS)]; +} + +/* Look up VNI in a per net namespace table */ +static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) +{ + struct vxlan_dev *vxlan; + struct hlist_node *node; + + hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) { + if (vxlan->vni == id) + return vxlan; + } + + return NULL; +} + +/* Fill in neighbour message in skbuff. */ +static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, + const struct vxlan_fdb *fdb, + u32 portid, u32 seq, int type, unsigned int flags) +{ + unsigned long now = jiffies; + struct nda_cacheinfo ci; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + memset(ndm, 0, sizeof(*ndm)); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_state = fdb->state; + ndm->ndm_ifindex = vxlan->dev->ifindex; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = NDA_DST; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) + goto nla_put_failure; + + if (nla_put_be32(skb, NDA_DST, fdb->remote_ip)) + goto nla_put_failure; + + ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_confirmed = 0; + ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_refcnt = 0; + + if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t vxlan_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(__be32)) /* NDA_DST */ + + nla_total_size(sizeof(struct nda_cacheinfo)); +} + +static void vxlan_fdb_notify(struct vxlan_dev *vxlan, + const struct vxlan_fdb *fdb, int type) +{ + struct net *net = dev_net(vxlan->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + +/* Hash Ethernet address */ +static u32 eth_hash(const unsigned char *addr) +{ + u64 value = get_unaligned((u64 *)addr); + + /* only want 6 bytes */ +#ifdef __BIG_ENDIAN + value <<= 16; +#else + value >>= 16; +#endif + return hash_64(value, FDB_HASH_BITS); +} + +/* Hash chain to use given mac address */ +static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, + const u8 *mac) +{ + return &vxlan->fdb_head[eth_hash(mac)]; +} + +/* Look up Ethernet address in forwarding table */ +static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, + const u8 *mac) + +{ + struct hlist_head *head = vxlan_fdb_head(vxlan, mac); + struct vxlan_fdb *f; + struct hlist_node *node; + + hlist_for_each_entry_rcu(f, node, head, hlist) { + if (compare_ether_addr(mac, f->eth_addr) == 0) + return f; + } + + return NULL; +} + +/* Add new entry to forwarding table -- assumes lock held */ +static int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, __be32 ip, + __u16 state, __u16 flags) +{ + struct vxlan_fdb *f; + int notify = 0; + + f = vxlan_find_mac(vxlan, mac); + if (f) { + if (flags & NLM_F_EXCL) { + netdev_dbg(vxlan->dev, + "lost race to create %pM\n", mac); + return -EEXIST; + } + if (f->state != state) { + f->state = state; + f->updated = jiffies; + notify = 1; + } + } else { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; + + if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax) + return -ENOSPC; + + netdev_dbg(vxlan->dev, "add %pM -> %pI4\n", mac, &ip); + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -ENOMEM; + + notify = 1; + f->remote_ip = ip; + f->state = state; + f->updated = f->used = jiffies; + memcpy(f->eth_addr, mac, ETH_ALEN); + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->hlist, + vxlan_fdb_head(vxlan, mac)); + } + + if (notify) + vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH); + + return 0; +} + +static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) +{ + netdev_dbg(vxlan->dev, + "delete %pM\n", f->eth_addr); + + --vxlan->addrcnt; + vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH); + + hlist_del_rcu(&f->hlist); + kfree_rcu(f, rcu); +} + +/* Add static entry (via netlink) */ +static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 flags) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + __be32 ip; + int err; + + if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { + pr_info("RTM_NEWNEIGH with invalid state %#x\n", + ndm->ndm_state); + return -EINVAL; + } + + if (tb[NDA_DST] == NULL) + return -EINVAL; + + if (nla_len(tb[NDA_DST]) != sizeof(__be32)) + return -EAFNOSUPPORT; + + ip = nla_get_be32(tb[NDA_DST]); + + spin_lock_bh(&vxlan->hash_lock); + err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags); + spin_unlock_bh(&vxlan->hash_lock); + + return err; +} + +/* Delete entry (via netlink) */ +static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, + const unsigned char *addr) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f; + int err = -ENOENT; + + spin_lock_bh(&vxlan->hash_lock); + f = vxlan_find_mac(vxlan, addr); + if (f) { + vxlan_fdb_destroy(vxlan, f); + err = 0; + } + spin_unlock_bh(&vxlan->hash_lock); + + return err; +} + +/* Dump forwarding table */ +static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, int idx) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + unsigned int h; + + for (h = 0; h < FDB_HASH_SIZE; ++h) { + struct vxlan_fdb *f; + struct hlist_node *n; + int err; + + hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) { + if (idx < cb->args[0]) + goto skip; + + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI); + if (err < 0) + break; +skip: + ++idx; + } + } + + return idx; +} + +/* Watch incoming packets to learn mapping between Ethernet address + * and Tunnel endpoint. + */ +static void vxlan_snoop(struct net_device *dev, + __be32 src_ip, const u8 *src_mac) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f; + int err; + + f = vxlan_find_mac(vxlan, src_mac); + if (likely(f)) { + f->used = jiffies; + if (likely(f->remote_ip == src_ip)) + return; + + if (net_ratelimit()) + netdev_info(dev, + "%pM migrated from %pI4 to %pI4\n", + src_mac, &f->remote_ip, &src_ip); + + f->remote_ip = src_ip; + f->updated = jiffies; + } else { + /* learned new entry */ + spin_lock(&vxlan->hash_lock); + err = vxlan_fdb_create(vxlan, src_mac, src_ip, + NUD_REACHABLE, + NLM_F_EXCL|NLM_F_CREATE); + spin_unlock(&vxlan->hash_lock); + } +} + + +/* See if multicast group is already in use by other ID */ +static bool vxlan_group_used(struct vxlan_net *vn, + const struct vxlan_dev *this) +{ + const struct vxlan_dev *vxlan; + struct hlist_node *node; + unsigned h; + + for (h = 0; h < VNI_HASH_SIZE; ++h) + hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) { + if (vxlan == this) + continue; + + if (!netif_running(vxlan->dev)) + continue; + + if (vxlan->gaddr == this->gaddr) + return true; + } + + return false; +} + +/* kernel equivalent to IP_ADD_MEMBERSHIP */ +static int vxlan_join_group(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); + struct sock *sk = vn->sock->sk; + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = vxlan->gaddr, + }; + int err; + + /* Already a member of group */ + if (vxlan_group_used(vn, vxlan)) + return 0; + + /* Need to drop RTNL to call multicast join */ + rtnl_unlock(); + lock_sock(sk); + err = ip_mc_join_group(sk, &mreq); + release_sock(sk); + rtnl_lock(); + + return err; +} + + +/* kernel equivalent to IP_DROP_MEMBERSHIP */ +static int vxlan_leave_group(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); + int err = 0; + struct sock *sk = vn->sock->sk; + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = vxlan->gaddr, + }; + + /* Only leave group when last vxlan is done. */ + if (vxlan_group_used(vn, vxlan)) + return 0; + + /* Need to drop RTNL to call multicast leave */ + rtnl_unlock(); + lock_sock(sk); + err = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + rtnl_lock(); + + return err; +} + +/* Callback from net/ipv4/udp.c to receive packets */ +static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *oip; + struct vxlanhdr *vxh; + struct vxlan_dev *vxlan; + struct vxlan_stats *stats; + __u32 vni; + int err; + + /* pop off outer UDP header */ + __skb_pull(skb, sizeof(struct udphdr)); + + /* Need Vxlan and inner Ethernet header to be present */ + if (!pskb_may_pull(skb, sizeof(struct vxlanhdr))) + goto error; + + /* Drop packets with reserved bits set */ + vxh = (struct vxlanhdr *) skb->data; + if (vxh->vx_flags != htonl(VXLAN_FLAGS) || + (vxh->vx_vni & htonl(0xff))) { + netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", + ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); + goto error; + } + + __skb_pull(skb, sizeof(struct vxlanhdr)); + skb_postpull_rcsum(skb, eth_hdr(skb), sizeof(struct vxlanhdr)); + + /* Is this VNI defined? */ + vni = ntohl(vxh->vx_vni) >> 8; + vxlan = vxlan_find_vni(sock_net(sk), vni); + if (!vxlan) { + netdev_dbg(skb->dev, "unknown vni %d\n", vni); + goto drop; + } + + if (!pskb_may_pull(skb, ETH_HLEN)) { + vxlan->dev->stats.rx_length_errors++; + vxlan->dev->stats.rx_errors++; + goto drop; + } + + /* Re-examine inner Ethernet packet */ + oip = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, vxlan->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + /* Ignore packet loops (and multicast echo) */ + if (compare_ether_addr(eth_hdr(skb)->h_source, + vxlan->dev->dev_addr) == 0) + goto drop; + + if (vxlan->learn) + vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source); + + __skb_tunnel_rx(skb, vxlan->dev); + skb_reset_network_header(skb); + + err = IP_ECN_decapsulate(oip, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &oip->saddr, oip->tos); + if (err > 1) { + ++vxlan->dev->stats.rx_frame_errors; + ++vxlan->dev->stats.rx_errors; + goto drop; + } + } + + stats = this_cpu_ptr(vxlan->stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + netif_rx(skb); + + return 0; +error: + /* Put UDP header back */ + __skb_push(skb, sizeof(struct udphdr)); + + return 1; +drop: + /* Consume bad packet */ + kfree_skb(skb); + return 0; +} + +/* Extract dsfield from inner protocol */ +static inline u8 vxlan_get_dsfield(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_get_dsfield((const struct ipv6hdr *)iph); + else + return 0; +} + +/* Propogate ECN bits out */ +static inline u8 vxlan_ecn_encap(u8 tos, + const struct iphdr *iph, + const struct sk_buff *skb) +{ + u8 inner = vxlan_get_dsfield(iph, skb); + + return INET_ECN_encapsulate(tos, inner); +} + +/* Transmit local packets over Vxlan + * + * Outer IP header inherits ECN and DF from inner header. + * Outer UDP destination is the VXLAN assigned port. + * source port is based on hash of flow if available + * otherwise use a random value + */ +static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct rtable *rt; + const struct ethhdr *eth; + const struct iphdr *old_iph; + struct iphdr *iph; + struct vxlanhdr *vxh; + struct udphdr *uh; + struct flowi4 fl4; + struct vxlan_fdb *f; + unsigned int pkt_len = skb->len; + u32 hash; + __be32 dst; + __be16 df = 0; + __u8 tos, ttl; + int err; + + /* Need space for new headers (invalidates iph ptr) */ + if (skb_cow_head(skb, VXLAN_HEADROOM)) + goto drop; + + eth = (void *)skb->data; + old_iph = ip_hdr(skb); + + if (!is_multicast_ether_addr(eth->h_dest) && + (f = vxlan_find_mac(vxlan, eth->h_dest))) + dst = f->remote_ip; + else if (vxlan->gaddr) { + dst = vxlan->gaddr; + } else + goto drop; + + ttl = vxlan->ttl; + if (!ttl && IN_MULTICAST(ntohl(dst))) + ttl = 1; + + tos = vxlan->tos; + if (tos == 1) + tos = vxlan_get_dsfield(old_iph, skb); + + hash = skb_get_rxhash(skb); + + rt = ip_route_output_gre(dev_net(dev), &fl4, dst, + vxlan->saddr, vxlan->vni, + RT_TOS(tos), vxlan->link); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &dst); + dev->stats.tx_carrier_errors++; + goto tx_error; + } + + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to %pI4\n", &dst); + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = htonl(vxlan->vni << 8); + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = htons(vxlan_port); + uh->source = hash ? :random32(); + + uh->len = htons(skb->len); + uh->check = 0; + + __skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = IPPROTO_UDP; + iph->tos = vxlan_ecn_encap(tos, old_iph, skb); + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; + iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + + /* See __IPTUNNEL_XMIT */ + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, &rt->dst, NULL); + + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats); + + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += pkt_len; + u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } + return NETDEV_TX_OK; + +drop: + dev->stats.tx_dropped++; + goto tx_free; + +tx_error: + dev->stats.tx_errors++; +tx_free: + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +/* Walk the forwarding table and purge stale entries */ +static void vxlan_cleanup(unsigned long arg) +{ + struct vxlan_dev *vxlan = (struct vxlan_dev *) arg; + unsigned long next_timer = jiffies + FDB_AGE_INTERVAL; + unsigned int h; + + if (!netif_running(vxlan->dev)) + return; + + spin_lock_bh(&vxlan->hash_lock); + for (h = 0; h < FDB_HASH_SIZE; ++h) { + struct hlist_node *p, *n; + hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { + struct vxlan_fdb *f + = container_of(p, struct vxlan_fdb, hlist); + unsigned long timeout; + + if (f->state == NUD_PERMANENT) + continue; + + timeout = f->used + vxlan->age_interval * HZ; + if (time_before_eq(timeout, jiffies)) { + netdev_dbg(vxlan->dev, + "garbage collect %pM\n", + f->eth_addr); + f->state = NUD_STALE; + vxlan_fdb_destroy(vxlan, f); + } else if (time_before(timeout, next_timer)) + next_timer = timeout; + } + } + spin_unlock_bh(&vxlan->hash_lock); + + mod_timer(&vxlan->age_timer, next_timer); +} + +/* Setup stats when device is created */ +static int vxlan_init(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + + vxlan->stats = alloc_percpu(struct vxlan_stats); + if (!vxlan->stats) + return -ENOMEM; + + return 0; +} + +/* Start ageing timer and join group when device is brought up */ +static int vxlan_open(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + + if (vxlan->gaddr) { + err = vxlan_join_group(dev); + if (err) + return err; + } + + if (vxlan->age_interval) + mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL); + + return 0; +} + +/* Purge the forwarding table */ +static void vxlan_flush(struct vxlan_dev *vxlan) +{ + unsigned h; + + spin_lock_bh(&vxlan->hash_lock); + for (h = 0; h < FDB_HASH_SIZE; ++h) { + struct hlist_node *p, *n; + hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { + struct vxlan_fdb *f + = container_of(p, struct vxlan_fdb, hlist); + vxlan_fdb_destroy(vxlan, f); + } + } + spin_unlock_bh(&vxlan->hash_lock); +} + +/* Cleanup timer and forwarding table on shutdown */ +static int vxlan_stop(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + + if (vxlan->gaddr) + vxlan_leave_group(dev); + + del_timer_sync(&vxlan->age_timer); + + vxlan_flush(vxlan); + + return 0; +} + +/* Merge per-cpu statistics */ +static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_stats tmp, sum = { 0 }; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + unsigned int start; + const struct vxlan_stats *stats + = per_cpu_ptr(vxlan->stats, cpu); + + do { + start = u64_stats_fetch_begin_bh(&stats->syncp); + memcpy(&tmp, stats, sizeof(tmp)); + } while (u64_stats_fetch_retry_bh(&stats->syncp, start)); + + sum.tx_bytes += tmp.tx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.rx_packets += tmp.rx_packets; + } + + stats->tx_bytes = sum.tx_bytes; + stats->tx_packets = sum.tx_packets; + stats->rx_bytes = sum.rx_bytes; + stats->rx_packets = sum.rx_packets; + + stats->multicast = dev->stats.multicast; + stats->rx_length_errors = dev->stats.rx_length_errors; + stats->rx_frame_errors = dev->stats.rx_frame_errors; + stats->rx_errors = dev->stats.rx_errors; + + stats->tx_dropped = dev->stats.tx_dropped; + stats->tx_carrier_errors = dev->stats.tx_carrier_errors; + stats->tx_aborted_errors = dev->stats.tx_aborted_errors; + stats->collisions = dev->stats.collisions; + stats->tx_errors = dev->stats.tx_errors; + + return stats; +} + +/* Stub, nothing needs to be done. */ +static void vxlan_set_multicast_list(struct net_device *dev) +{ +} + +static const struct net_device_ops vxlan_netdev_ops = { + .ndo_init = vxlan_init, + .ndo_open = vxlan_open, + .ndo_stop = vxlan_stop, + .ndo_start_xmit = vxlan_xmit, + .ndo_get_stats64 = vxlan_stats64, + .ndo_set_rx_mode = vxlan_set_multicast_list, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, + .ndo_fdb_add = vxlan_fdb_add, + .ndo_fdb_del = vxlan_fdb_delete, + .ndo_fdb_dump = vxlan_fdb_dump, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type vxlan_type = { + .name = "vxlan", +}; + +static void vxlan_free(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + + free_percpu(vxlan->stats); + free_netdev(dev); +} + +/* Initialize the device structure. */ +static void vxlan_setup(struct net_device *dev) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + unsigned h; + + eth_hw_addr_random(dev); + ether_setup(dev); + + dev->netdev_ops = &vxlan_netdev_ops; + dev->destructor = vxlan_free; + SET_NETDEV_DEVTYPE(dev, &vxlan_type); + + dev->tx_queue_len = 0; + dev->features |= NETIF_F_LLTX; + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + + spin_lock_init(&vxlan->hash_lock); + + init_timer_deferrable(&vxlan->age_timer); + vxlan->age_timer.function = vxlan_cleanup; + vxlan->age_timer.data = (unsigned long) vxlan; + + vxlan->dev = dev; + + for (h = 0; h < FDB_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&vxlan->fdb_head[h]); +} + +static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { + [IFLA_VXLAN_ID] = { .type = NLA_U32 }, + [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, + [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, + [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, + [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, + [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, + [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, +}; + +static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { + pr_debug("invalid link address (not ethernet)\n"); + return -EINVAL; + } + + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { + pr_debug("invalid all zero ethernet address\n"); + return -EADDRNOTAVAIL; + } + } + + if (!data) + return -EINVAL; + + if (data[IFLA_VXLAN_ID]) { + __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); + if (id >= VXLAN_VID_MASK) + return -ERANGE; + } + + if (data[IFLA_VXLAN_GROUP]) { + __be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); + if (!IN_MULTICAST(ntohl(gaddr))) { + pr_debug("group address is not IPv4 multicast\n"); + return -EADDRNOTAVAIL; + } + } + return 0; +} + +static int vxlan_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + __u32 vni; + int err; + + if (!data[IFLA_VXLAN_ID]) + return -EINVAL; + + vni = nla_get_u32(data[IFLA_VXLAN_ID]); + if (vxlan_find_vni(net, vni)) { + pr_info("duplicate VNI %u\n", vni); + return -EEXIST; + } + vxlan->vni = vni; + + if (data[IFLA_VXLAN_GROUP]) + vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]); + + if (data[IFLA_VXLAN_LOCAL]) + vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]); + + if (data[IFLA_VXLAN_LINK]) { + vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]); + + if (!tb[IFLA_MTU]) { + struct net_device *lowerdev; + lowerdev = __dev_get_by_index(net, vxlan->link); + dev->mtu = lowerdev->mtu - VXLAN_HEADROOM; + } + } + + if (data[IFLA_VXLAN_TOS]) + vxlan->tos = nla_get_u8(data[IFLA_VXLAN_TOS]); + + if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) + vxlan->learn = true; + + if (data[IFLA_VXLAN_AGEING]) + vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]); + else + vxlan->age_interval = FDB_AGE_DEFAULT; + + if (data[IFLA_VXLAN_LIMIT]) + vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); + + err = register_netdevice(dev); + if (!err) + hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni)); + + return err; +} + +static void vxlan_dellink(struct net_device *dev, struct list_head *head) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + + hlist_del_rcu(&vxlan->hlist); + + unregister_netdevice_queue(dev, head); +} + +static size_t vxlan_get_size(const struct net_device *dev) +{ + + return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */ + nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */ + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ + nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ + 0; +} + +static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + const struct vxlan_dev *vxlan = netdev_priv(dev); + + if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) + goto nla_put_failure; + + if (vxlan->gaddr && nla_put_u32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr)) + goto nla_put_failure; + + if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link)) + goto nla_put_failure; + + if (vxlan->saddr && nla_put_u32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) + goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || + nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) || + nla_put_u8(skb, IFLA_VXLAN_LEARNING, vxlan->learn) || + nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || + nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops vxlan_link_ops __read_mostly = { + .kind = "vxlan", + .maxtype = IFLA_VXLAN_MAX, + .policy = vxlan_policy, + .priv_size = sizeof(struct vxlan_dev), + .setup = vxlan_setup, + .validate = vxlan_validate, + .newlink = vxlan_newlink, + .dellink = vxlan_dellink, + .get_size = vxlan_get_size, + .fill_info = vxlan_fill_info, +}; + +static __net_init int vxlan_init_net(struct net *net) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct sock *sk; + struct sockaddr_in vxlan_addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + int rc; + unsigned h; + + /* Create UDP socket for encapsulation receive. */ + rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock); + if (rc < 0) { + pr_debug("UDP socket create failed\n"); + return rc; + } + + vxlan_addr.sin_port = htons(vxlan_port); + + rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr, + sizeof(vxlan_addr)); + if (rc < 0) { + pr_debug("bind for UDP socket %pI4:%u (%d)\n", + &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); + sock_release(vn->sock); + vn->sock = NULL; + return rc; + } + + /* Disable multicast loopback */ + sk = vn->sock->sk; + inet_sk(sk)->mc_loop = 0; + + /* Mark socket as an encapsulation socket. */ + udp_sk(sk)->encap_type = 1; + udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; + udp_encap_enable(); + + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&vn->vni_list[h]); + + return 0; +} + +static __net_exit void vxlan_exit_net(struct net *net) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + + if (vn->sock) { + sock_release(vn->sock); + vn->sock = NULL; + } +} + +static struct pernet_operations vxlan_net_ops = { + .init = vxlan_init_net, + .exit = vxlan_exit_net, + .id = &vxlan_net_id, + .size = sizeof(struct vxlan_net), +}; + +static int __init vxlan_init_module(void) +{ + int rc; + + get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); + + rc = register_pernet_device(&vxlan_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&vxlan_link_ops); + if (rc) + goto out2; + + return 0; + +out2: + unregister_pernet_device(&vxlan_net_ops); +out1: + return rc; +} +module_init(vxlan_init_module); + +static void __exit vxlan_cleanup_module(void) +{ + rtnl_link_unregister(&vxlan_link_ops); + unregister_pernet_device(&vxlan_net_ops); +} +module_exit(vxlan_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_VERSION(VXLAN_VERSION); +MODULE_AUTHOR("Stephen Hemminger "); +MODULE_ALIAS_RTNL_LINK("vxlan"); diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 4491177e984..e4dad4ddf08 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -272,6 +272,22 @@ enum macvlan_mode { #define MACVLAN_FLAG_NOPROMISC 1 +/* VXLAN section */ +enum { + IFLA_VXLAN_UNSPEC, + IFLA_VXLAN_ID, + IFLA_VXLAN_GROUP, + IFLA_VXLAN_LINK, + IFLA_VXLAN_LOCAL, + IFLA_VXLAN_TTL, + IFLA_VXLAN_TOS, + IFLA_VXLAN_LEARNING, + IFLA_VXLAN_AGEING, + IFLA_VXLAN_LIMIT, + __IFLA_VXLAN_MAX +}; +#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) + /* SR-IOV virtual function management section */ enum { -- cgit v1.2.3-70-g09d2