diff options
66 files changed, 2600 insertions, 459 deletions
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt new file mode 100644 index 00000000000..cf996394e46 --- /dev/null +++ b/Documentation/networking/ipvlan.txt @@ -0,0 +1,107 @@ + + IPVLAN Driver HOWTO + +Initial Release: + Mahesh Bandewar <maheshb AT google.com> + +1. Introduction: + This is conceptually very similar to the macvlan driver with one major +exception of using L3 for mux-ing /demux-ing among slaves. This property makes +the master device share the L2 with it's slave devices. I have developed this +driver in conjuntion with network namespaces and not sure if there is use case +outside of it. + + +2. Building and Installation: + In order to build the driver, please select the config item CONFIG_IPVLAN. +The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module +(CONFIG_IPVLAN=m). + + +3. Configuration: + There are no module parameters for this driver and it can be configured +using IProute2/ip utility. + + ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 } + + e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 + + +4. Operating modes: + IPvlan has two modes of operation - L2 and L3. For a given master device, +you can select one of these two modes and all slaves on that master will +operate in the same (selected) mode. The RX mode is almost identical except +that in L3 mode the slaves wont receive any multicast / broadcast traffic. +L3 mode is more restrictive since routing is controlled from the other (mostly) +default namespace. + +4.1 L2 mode: + In this mode TX processing happens on the stack instance attached to the +slave device and packets are switched and queued to the master device to send +out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) +as well. + +4.2 L3 mode: + In this mode TX processing upto L3 happens on the stack instance attached +to the slave device and packets are switched to the stack instance of the +master device for the L2 processing and routing from that instance will be +used before packets are queued on the outbound device. In this mode the slaves +will not receive nor can send multicast / broadcast traffic. + + +5. What to choose (macvlan vs. ipvlan)? + These two devices are very similar in many regards and the specific use +case could very well define which device to choose. if one of the following +situations defines your use case then you can choose to use ipvlan - + (a) The Linux host that is connected to the external switch / router has +policy configured that allows only one mac per port. + (b) No of virtual devices created on a master exceed the mac capacity and +puts the NIC in promiscous mode and degraded performance is a concern. + (c) If the slave device is to be put into the hostile / untrusted network +namespace where L2 on the slave could be changed / misused. + + +6. Example configuration: + + +=============================================================+ + | Host: host1 | + | | + | +----------------------+ +----------------------+ | + | | NS:ns0 | | NS:ns1 | | + | | | | | | + | | | | | | + | | ipvl0 | | ipvl1 | | + | +----------#-----------+ +-----------#----------+ | + | # # | + | ################################ | + | # eth0 | + +==============================#==============================+ + + + (a) Create two network namespaces - ns0, ns1 + ip netns add ns0 + ip netns add ns1 + + (b) Create two ipvlan slaves on eth0 (master device) + ip link add link eth0 ipvl0 type ipvlan mode l2 + ip link add link eth0 ipvl1 type ipvlan mode l2 + + (c) Assign slaves to the respective network namespaces + ip link set dev ipvl0 netns ns0 + ip link set dev ipvl1 netns ns1 + + (d) Now switch to the namespace (ns0 or ns1) to configure the slave devices + - For ns0 + (1) ip netns exec ns0 bash + (2) ip link set dev ipvl0 up + (3) ip link set dev lo up + (4) ip -4 addr add 127.0.0.1 dev lo + (5) ip -4 addr add $IPADDR dev ipvl0 + (6) ip -4 route add default via $ROUTER dev ipvl0 + - For ns1 + (1) ip netns exec ns1 bash + (2) ip link set dev ipvl1 up + (3) ip link set dev lo up + (4) ip -4 addr add 127.0.0.1 dev lo + (5) ip -4 addr add $IPADDR dev ipvl1 + (6) ip -4 route add default via $ROUTER dev ipvl1 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f9009be3f30..b6d64f54657 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -145,6 +145,24 @@ config MACVTAP To compile this driver as a module, choose M here: the module will be called macvtap. + +config IPVLAN + tristate "IP-VLAN support" + ---help--- + This allows one to create virtual devices off of a main interface + and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) + on packets. All interfaces (including the main interface) share L2 + making it transparent to the connected L2 switch. + + Ipvlan devices can be added using the "ip" command from the + iproute2 package starting with the iproute2-X.Y.ZZ release: + + "ip link add link <main-dev> [ NAME ] type ipvlan" + + To compile this driver as a module, choose M here: the module + will be called ipvlan. + + config VXLAN tristate "Virtual eXtensible Local Area Network (VXLAN)" depends on INET diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 61aefdd1e17..e25fdd7d905 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -6,6 +6,7 @@ # Networking Core Drivers # obj-$(CONFIG_BONDING) += bonding/ +obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a576da1eedf..3aea82bb903 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -210,114 +210,25 @@ struct filter_entry { NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) -#define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) } - -static const struct pci_device_id cxgb4_pci_tbl[] = { - CH_DEVICE(0xa000, 0), /* PE10K */ - CH_DEVICE(0x4001, -1), - CH_DEVICE(0x4002, -1), - CH_DEVICE(0x4003, -1), - CH_DEVICE(0x4004, -1), - CH_DEVICE(0x4005, -1), - CH_DEVICE(0x4006, -1), - CH_DEVICE(0x4007, -1), - CH_DEVICE(0x4008, -1), - CH_DEVICE(0x4009, -1), - CH_DEVICE(0x400a, -1), - CH_DEVICE(0x400d, -1), - CH_DEVICE(0x400e, -1), - CH_DEVICE(0x4080, -1), - CH_DEVICE(0x4081, -1), - CH_DEVICE(0x4082, -1), - CH_DEVICE(0x4083, -1), - CH_DEVICE(0x4084, -1), - CH_DEVICE(0x4085, -1), - CH_DEVICE(0x4086, -1), - CH_DEVICE(0x4087, -1), - CH_DEVICE(0x4088, -1), - CH_DEVICE(0x4401, 4), - CH_DEVICE(0x4402, 4), - CH_DEVICE(0x4403, 4), - CH_DEVICE(0x4404, 4), - CH_DEVICE(0x4405, 4), - CH_DEVICE(0x4406, 4), - CH_DEVICE(0x4407, 4), - CH_DEVICE(0x4408, 4), - CH_DEVICE(0x4409, 4), - CH_DEVICE(0x440a, 4), - CH_DEVICE(0x440d, 4), - CH_DEVICE(0x440e, 4), - CH_DEVICE(0x4480, 4), - CH_DEVICE(0x4481, 4), - CH_DEVICE(0x4482, 4), - CH_DEVICE(0x4483, 4), - CH_DEVICE(0x4484, 4), - CH_DEVICE(0x4485, 4), - CH_DEVICE(0x4486, 4), - CH_DEVICE(0x4487, 4), - CH_DEVICE(0x4488, 4), - CH_DEVICE(0x5001, 4), - CH_DEVICE(0x5002, 4), - CH_DEVICE(0x5003, 4), - CH_DEVICE(0x5004, 4), - CH_DEVICE(0x5005, 4), - CH_DEVICE(0x5006, 4), - CH_DEVICE(0x5007, 4), - CH_DEVICE(0x5008, 4), - CH_DEVICE(0x5009, 4), - CH_DEVICE(0x500A, 4), - CH_DEVICE(0x500B, 4), - CH_DEVICE(0x500C, 4), - CH_DEVICE(0x500D, 4), - CH_DEVICE(0x500E, 4), - CH_DEVICE(0x500F, 4), - CH_DEVICE(0x5010, 4), - CH_DEVICE(0x5011, 4), - CH_DEVICE(0x5012, 4), - CH_DEVICE(0x5013, 4), - CH_DEVICE(0x5014, 4), - CH_DEVICE(0x5015, 4), - CH_DEVICE(0x5080, 4), - CH_DEVICE(0x5081, 4), - CH_DEVICE(0x5082, 4), - CH_DEVICE(0x5083, 4), - CH_DEVICE(0x5084, 4), - CH_DEVICE(0x5085, 4), - CH_DEVICE(0x5086, 4), - CH_DEVICE(0x5087, 4), - CH_DEVICE(0x5088, 4), - CH_DEVICE(0x5401, 4), - CH_DEVICE(0x5402, 4), - CH_DEVICE(0x5403, 4), - CH_DEVICE(0x5404, 4), - CH_DEVICE(0x5405, 4), - CH_DEVICE(0x5406, 4), - CH_DEVICE(0x5407, 4), - CH_DEVICE(0x5408, 4), - CH_DEVICE(0x5409, 4), - CH_DEVICE(0x540A, 4), - CH_DEVICE(0x540B, 4), - CH_DEVICE(0x540C, 4), - CH_DEVICE(0x540D, 4), - CH_DEVICE(0x540E, 4), - CH_DEVICE(0x540F, 4), - CH_DEVICE(0x5410, 4), - CH_DEVICE(0x5411, 4), - CH_DEVICE(0x5412, 4), - CH_DEVICE(0x5413, 4), - CH_DEVICE(0x5414, 4), - CH_DEVICE(0x5415, 4), - CH_DEVICE(0x5480, 4), - CH_DEVICE(0x5481, 4), - CH_DEVICE(0x5482, 4), - CH_DEVICE(0x5483, 4), - CH_DEVICE(0x5484, 4), - CH_DEVICE(0x5485, 4), - CH_DEVICE(0x5486, 4), - CH_DEVICE(0x5487, 4), - CH_DEVICE(0x5488, 4), - { 0, } -}; +/* Macros needed to support the PCI Device ID Table ... + */ +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ + static struct pci_device_id cxgb4_pci_tbl[] = { +#define CH_PCI_DEVICE_ID_FUNCTION 0x4 + +/* Include PCI Device IDs for both PF4 and PF0-3 so our PCI probe() routine is + * called for both. + */ +#define CH_PCI_DEVICE_ID_FUNCTION2 0x0 + +#define CH_PCI_ID_TABLE_ENTRY(devid) \ + {PCI_VDEVICE(CHELSIO, (devid)), 4} + +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ + { 0, } \ + } + +#include "t4_pci_id_tbl.h" #define FW4_FNAME "cxgb4/t4fw.bin" #define FW5_FNAME "cxgb4/t5fw.bin" diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h new file mode 100644 index 00000000000..9e4f95a91fb --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -0,0 +1,160 @@ +/* + * This file is part of the Chelsio T4/T5 Ethernet driver for Linux. + * + * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __T4_PCI_ID_TBL_H__ +#define __T4_PCI_ID_TBL_H__ + +/* The code can defined cpp macros for creating a PCI Device ID Table. This is + * useful because it allows the PCI ID Table to be maintained in a single place. + * + * The macros are: + * + * CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN + * -- Used to start the definition of the PCI ID Table. + * + * CH_PCI_DEVICE_ID_FUNCTION + * -- The PCI Function Number to use in the PCI Device ID Table. "0" + * -- for drivers attaching to PF0-3, "4" for drivers attaching to PF4, + * -- "8" for drivers attaching to SR-IOV Virtual Functions, etc. + * + * CH_PCI_DEVICE_ID_FUNCTION2 [optional] + * -- If defined, create a PCI Device ID Table with both + * -- CH_PCI_DEVICE_ID_FUNCTION and CH_PCI_DEVICE_ID_FUNCTION2 populated. + * + * CH_PCI_ID_TABLE_ENTRY(DeviceID) + * -- Used for the individual PCI Device ID entries. Note that we will + * -- be adding a trailing comma (",") after all of the entries (and + * -- between the pairs of entries if CH_PCI_DEVICE_ID_FUNCTION2 is defined). + * + * CH_PCI_DEVICE_ID_TABLE_DEFINE_END + * -- Used to finish the definition of the PCI ID Table. Note that we + * -- will be adding a trailing semi-colon (";") here. + */ +#ifdef CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN + +#ifndef CH_PCI_DEVICE_ID_FUNCTION +#error CH_PCI_DEVICE_ID_FUNCTION not defined! +#endif +#ifndef CH_PCI_ID_TABLE_ENTRY +#error CH_PCI_ID_TABLE_ENTRY not defined! +#endif +#ifndef CH_PCI_DEVICE_ID_TABLE_DEFINE_END +#error CH_PCI_DEVICE_ID_TABLE_DEFINE_END not defined! +#endif + +/* T4 and later ASICs use a PCI Device ID scheme of 0xVFPP where: + * + * V = "4" for T4; "5" for T5, etc. + * F = "0" for PF 0..3; "4".."7" for PF4..7; and "8" for VFs + * PP = adapter product designation + * + * We use this consistency in order to create the proper PCI Device IDs + * for the specified CH_PCI_DEVICE_ID_FUNCTION. + */ +#ifndef CH_PCI_DEVICE_ID_FUNCTION2 +#define CH_PCI_ID_TABLE_FENTRY(devid) \ + CH_PCI_ID_TABLE_ENTRY((devid) | \ + ((CH_PCI_DEVICE_ID_FUNCTION) << 8)) +#else +#define CH_PCI_ID_TABLE_FENTRY(devid) \ + CH_PCI_ID_TABLE_ENTRY((devid) | \ + ((CH_PCI_DEVICE_ID_FUNCTION) << 8)), \ + CH_PCI_ID_TABLE_ENTRY((devid) | \ + ((CH_PCI_DEVICE_ID_FUNCTION2) << 8)) +#endif + +CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN + /* T4 adapters: + */ + CH_PCI_ID_TABLE_FENTRY(0x4000), /* T440-dbg */ + CH_PCI_ID_TABLE_FENTRY(0x4001), /* T420-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4002), /* T422-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4003), /* T440-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4004), /* T420-bch */ + CH_PCI_ID_TABLE_FENTRY(0x4005), /* T440-bch */ + CH_PCI_ID_TABLE_FENTRY(0x4006), /* T440-ch */ + CH_PCI_ID_TABLE_FENTRY(0x4007), /* T420-so */ + CH_PCI_ID_TABLE_FENTRY(0x4008), /* T420-cx */ + CH_PCI_ID_TABLE_FENTRY(0x4009), /* T420-bt */ + CH_PCI_ID_TABLE_FENTRY(0x400a), /* T404-bt */ + CH_PCI_ID_TABLE_FENTRY(0x400b), /* B420-sr */ + CH_PCI_ID_TABLE_FENTRY(0x400c), /* B404-bt */ + CH_PCI_ID_TABLE_FENTRY(0x400d), /* T480-cr */ + CH_PCI_ID_TABLE_FENTRY(0x400e), /* T440-LP-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4080), /* Custom T480-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4081), /* Custom T440-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4082), /* Custom T420-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4083), /* Custom T420-xaui */ + CH_PCI_ID_TABLE_FENTRY(0x4084), /* Custom T440-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4085), /* Custom T420-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4086), /* Custom T440-bt */ + CH_PCI_ID_TABLE_FENTRY(0x4087), /* Custom T440-cr */ + CH_PCI_ID_TABLE_FENTRY(0x4088), /* Custom T440 2-xaui, 2-xfi */ + + /* T5 adapters: + */ + CH_PCI_ID_TABLE_FENTRY(0x5000), /* T580-dbg */ + CH_PCI_ID_TABLE_FENTRY(0x5001), /* T520-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5002), /* T522-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5003), /* T540-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5004), /* T520-bch */ + CH_PCI_ID_TABLE_FENTRY(0x5005), /* T540-bch */ + CH_PCI_ID_TABLE_FENTRY(0x5006), /* T540-ch */ + CH_PCI_ID_TABLE_FENTRY(0x5007), /* T520-so */ + CH_PCI_ID_TABLE_FENTRY(0x5008), /* T520-cx */ + CH_PCI_ID_TABLE_FENTRY(0x5009), /* T520-bt */ + CH_PCI_ID_TABLE_FENTRY(0x500a), /* T504-bt */ + CH_PCI_ID_TABLE_FENTRY(0x500b), /* B520-sr */ + CH_PCI_ID_TABLE_FENTRY(0x500c), /* B504-bt */ + CH_PCI_ID_TABLE_FENTRY(0x500d), /* T580-cr */ + CH_PCI_ID_TABLE_FENTRY(0x500e), /* T540-LP-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5010), /* T580-LP-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5011), /* T520-LL-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5012), /* T560-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5013), /* T580-chr */ + CH_PCI_ID_TABLE_FENTRY(0x5014), /* T580-so */ + CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */ + CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5083), /* Custom T540-LP-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5084), /* Custom T580-cr */ + CH_PCI_ID_TABLE_FENTRY(0x5085), /* Custom 3x T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5086), /* Custom 2x T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5087), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5088), /* Custom T570-CR */ +CH_PCI_DEVICE_ID_TABLE_DEFINE_END; + +#endif /* CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN */ + +#endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 099f7ce056f..ad88246a428 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2908,67 +2908,18 @@ static void cxgb4vf_pci_shutdown(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } -/* - * PCI Device registration data structures. - */ -#define CH_DEVICE(devid) \ - { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 } - -static const struct pci_device_id cxgb4vf_pci_tbl[] = { - CH_DEVICE(0xb000), /* PE10K FPGA */ - CH_DEVICE(0x4801), /* T420-cr */ - CH_DEVICE(0x4802), /* T422-cr */ - CH_DEVICE(0x4803), /* T440-cr */ - CH_DEVICE(0x4804), /* T420-bch */ - CH_DEVICE(0x4805), /* T440-bch */ - CH_DEVICE(0x4806), /* T460-ch */ - CH_DEVICE(0x4807), /* T420-so */ - CH_DEVICE(0x4808), /* T420-cx */ - CH_DEVICE(0x4809), /* T420-bt */ - CH_DEVICE(0x480a), /* T404-bt */ - CH_DEVICE(0x480d), /* T480-cr */ - CH_DEVICE(0x480e), /* T440-lp-cr */ - CH_DEVICE(0x4880), - CH_DEVICE(0x4881), - CH_DEVICE(0x4882), - CH_DEVICE(0x4883), - CH_DEVICE(0x4884), - CH_DEVICE(0x4885), - CH_DEVICE(0x4886), - CH_DEVICE(0x4887), - CH_DEVICE(0x4888), - CH_DEVICE(0x5801), /* T520-cr */ - CH_DEVICE(0x5802), /* T522-cr */ - CH_DEVICE(0x5803), /* T540-cr */ - CH_DEVICE(0x5804), /* T520-bch */ - CH_DEVICE(0x5805), /* T540-bch */ - CH_DEVICE(0x5806), /* T540-ch */ - CH_DEVICE(0x5807), /* T520-so */ - CH_DEVICE(0x5808), /* T520-cx */ - CH_DEVICE(0x5809), /* T520-bt */ - CH_DEVICE(0x580a), /* T504-bt */ - CH_DEVICE(0x580b), /* T520-sr */ - CH_DEVICE(0x580c), /* T504-bt */ - CH_DEVICE(0x580d), /* T580-cr */ - CH_DEVICE(0x580e), /* T540-lp-cr */ - CH_DEVICE(0x580f), /* Amsterdam */ - CH_DEVICE(0x5810), /* T580-lp-cr */ - CH_DEVICE(0x5811), /* T520-lp-cr */ - CH_DEVICE(0x5812), /* T560-cr */ - CH_DEVICE(0x5813), /* T580-cr */ - CH_DEVICE(0x5814), /* T580-so-cr */ - CH_DEVICE(0x5815), /* T502-bt */ - CH_DEVICE(0x5880), - CH_DEVICE(0x5881), - CH_DEVICE(0x5882), - CH_DEVICE(0x5883), - CH_DEVICE(0x5884), - CH_DEVICE(0x5885), - CH_DEVICE(0x5886), - CH_DEVICE(0x5887), - CH_DEVICE(0x5888), - { 0, } -}; +/* Macros needed to support the PCI Device ID Table ... + */ +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ + static struct pci_device_id cxgb4vf_pci_tbl[] = { +#define CH_PCI_DEVICE_ID_FUNCTION 0x8 + +#define CH_PCI_ID_TABLE_ENTRY(devid) \ + { PCI_VDEVICE(CHELSIO, (devid)), 0 } + +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } } + +#include "../cxgb4/t4_pci_id_tbl.h" MODULE_DESCRIPTION(DRV_DESC); MODULE_AUTHOR("Chelsio Communications"); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 7aa9388a3aa..469691ad4a1 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -53,11 +53,13 @@ #define FEC_R_FSTART 0x150 /* FIFO receive start reg */ #define FEC_R_DES_START_1 0x160 /* Receive descriptor ring 1 */ #define FEC_X_DES_START_1 0x164 /* Transmit descriptor ring 1 */ +#define FEC_R_BUFF_SIZE_1 0x168 /* Maximum receive buff ring1 size */ #define FEC_R_DES_START_2 0x16c /* Receive descriptor ring 2 */ #define FEC_X_DES_START_2 0x170 /* Transmit descriptor ring 2 */ +#define FEC_R_BUFF_SIZE_2 0x174 /* Maximum receive buff ring2 size */ #define FEC_R_DES_START_0 0x180 /* Receive descriptor ring */ #define FEC_X_DES_START_0 0x184 /* Transmit descriptor ring */ -#define FEC_R_BUFF_SIZE 0x188 /* Maximum receive buff size */ +#define FEC_R_BUFF_SIZE_0 0x188 /* Maximum receive buff size */ #define FEC_R_FIFO_RSFL 0x190 /* Receive FIFO section full threshold */ #define FEC_R_FIFO_RSEM 0x194 /* Receive FIFO section empty threshold */ #define FEC_R_FIFO_RAEM 0x198 /* Receive FIFO almost empty threshold */ @@ -165,7 +167,9 @@ #define FEC_X_DES_START_0 0x3d4 /* Transmit descriptor ring */ #define FEC_X_DES_START_1 FEC_X_DES_START_0 #define FEC_X_DES_START_2 FEC_X_DES_START_0 -#define FEC_R_BUFF_SIZE 0x3d8 /* Maximum receive buff size */ +#define FEC_R_BUFF_SIZE_0 0x3d8 /* Maximum receive buff size */ +#define FEC_R_BUFF_SIZE_1 FEC_R_BUFF_SIZE_0 +#define FEC_R_BUFF_SIZE_2 FEC_R_BUFF_SIZE_0 #define FEC_FIFO_RAM 0x400 /* FIFO RAM buffer */ /* Not existed in real chip * Just for pass build. @@ -285,6 +289,9 @@ struct bufdesc_ex { #define FEC_X_DES_START(X) (((X) == 1) ? FEC_X_DES_START_1 : \ (((X) == 2) ? \ FEC_X_DES_START_2 : FEC_X_DES_START_0)) +#define FEC_R_BUFF_SIZE(X) (((X) == 1) ? FEC_R_BUFF_SIZE_1 : \ + (((X) == 2) ? \ + FEC_R_BUFF_SIZE_2 : FEC_R_BUFF_SIZE_0)) #define FEC_R_DES_ACTIVE(X) (((X) == 1) ? FEC_R_DES_ACTIVE_1 : \ (((X) == 2) ? \ FEC_R_DES_ACTIVE_2 : FEC_R_DES_ACTIVE_0)) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1b6d26b0082..d2955ce24d0 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -867,6 +867,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd_dma, fep->hwp + FEC_R_DES_START(i)); + writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) @@ -941,9 +942,6 @@ fec_restart(struct net_device *ndev) /* Clear any outstanding interrupt. */ writel(0xffc00000, fep->hwp + FEC_IEVENT); - /* Set maximum receive buffer size. */ - writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); - fec_enet_bd_init(ndev); fec_enet_enable_ring(ndev); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index e157541b842..63dc0f95d05 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -112,6 +112,7 @@ #include <linux/io.h> #include <linux/uaccess.h> #include <linux/gfp.h> +#include <linux/if_vlan.h> #include <asm/irq.h> #define RTL8139_DRIVER_NAME DRV_NAME " Fast Ethernet driver " DRV_VERSION @@ -182,13 +183,13 @@ static int debug = -1; /* Number of Tx descriptor registers. */ #define NUM_TX_DESC 4 -/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/ +/* max supported ethernet frame size -- must be at least (dev->mtu+18+4).*/ #define MAX_ETH_FRAME_SIZE 1792 /* max supported payload size */ -#define MAX_ETH_DATA_SIZE (MAX_ETH_FRAME_SIZE - ETH_HLEN - ETH_FCS_LEN) +#define MAX_ETH_DATA_SIZE (MAX_ETH_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN) -/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */ +/* Size of the Tx bounce buffers -- must be at least (dev->mtu+18+4). */ #define TX_BUF_SIZE MAX_ETH_FRAME_SIZE #define TX_BUF_TOT_LEN (TX_BUF_SIZE * NUM_TX_DESC) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 77ed74561e5..f9c87624a0a 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -59,6 +59,8 @@ #include <linux/of_device.h> #include <linux/of_gpio.h> #include <linux/of_net.h> +#include <linux/pm_runtime.h> + #include "smsc911x.h" #define SMSC_CHIPNAME "smsc911x" @@ -2338,6 +2340,9 @@ static int smsc911x_drv_remove(struct platform_device *pdev) free_netdev(dev); + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } @@ -2491,6 +2496,9 @@ static int smsc911x_drv_probe(struct platform_device *pdev) if (pdata->config.shift) pdata->ops = &shifted_smsc911x_ops; + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + retval = smsc911x_init(dev); if (retval < 0) goto out_disable_resources; @@ -2572,6 +2580,8 @@ out_unregister_netdev_5: out_free_irq: free_irq(dev->irq, dev); out_disable_resources: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); (void)smsc911x_disable_resources(pdev); out_enable_resources_fail: smsc911x_free_resources(pdev); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6b463117dca..6fc834e4306 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -561,9 +561,7 @@ int netvsc_device_remove(struct hv_device *device) vmbus_close(device->channel); /* Release all resources */ - if (net_device->sub_cb_buf) - vfree(net_device->sub_cb_buf); - + vfree(net_device->sub_cb_buf); free_netvsc_device(net_device); return 0; } diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile new file mode 100644 index 00000000000..df79910192d --- /dev/null +++ b/drivers/net/ipvlan/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Ethernet Ipvlan driver +# + +obj-$(CONFIG_IPVLAN) += ipvlan.o + +ipvlan-objs := ipvlan_core.o ipvlan_main.o diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h new file mode 100644 index 00000000000..ab3e7614ed7 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ +#ifndef __IPVLAN_H +#define __IPVLAN_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <linux/if_link.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/inetdevice.h> +#include <net/rtnetlink.h> +#include <net/gre.h> +#include <net/route.h> +#include <net/addrconf.h> + +#define IPVLAN_DRV "ipvlan" +#define IPV_DRV_VER "0.1" + +#define IPVLAN_HASH_SIZE (1 << BITS_PER_BYTE) +#define IPVLAN_HASH_MASK (IPVLAN_HASH_SIZE - 1) + +#define IPVLAN_MAC_FILTER_BITS 8 +#define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) +#define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) + +typedef enum { + IPVL_IPV6 = 0, + IPVL_ICMPV6, + IPVL_IPV4, + IPVL_ARP, +} ipvl_hdr_type; + +struct ipvl_pcpu_stats { + u64 rx_pkts; + u64 rx_bytes; + u64 rx_mcast; + u64 tx_pkts; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_errs; + u32 tx_drps; +}; + +struct ipvl_port; + +struct ipvl_dev { + struct net_device *dev; + struct list_head pnode; + struct ipvl_port *port; + struct net_device *phy_dev; + struct list_head addrs; + int ipv4cnt; + int ipv6cnt; + struct ipvl_pcpu_stats *pcpu_stats; + DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); + netdev_features_t sfeatures; + u32 msg_enable; + u16 mtu_adj; +}; + +struct ipvl_addr { + struct ipvl_dev *master; /* Back pointer to master */ + union { + struct in6_addr ip6; /* IPv6 address on logical interface */ + struct in_addr ip4; /* IPv4 address on logical interface */ + } ipu; +#define ip6addr ipu.ip6 +#define ip4addr ipu.ip4 + struct hlist_node hlnode; /* Hash-table linkage */ + struct list_head anode; /* logical-interface linkage */ + struct rcu_head rcu; + ipvl_hdr_type atype; +}; + +struct ipvl_port { + struct net_device *dev; + struct hlist_head hlhead[IPVLAN_HASH_SIZE]; + struct list_head ipvlans; + struct rcu_head rcu; + int count; + u16 mode; +}; + +static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) +{ + return rcu_dereference(d->rx_handler_data); +} + +static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) +{ + return rtnl_dereference(d->rx_handler_data); +} + +static inline bool ipvlan_dev_master(struct net_device *d) +{ + return d->priv_flags & IFF_IPVLAN_MASTER; +} + +static inline bool ipvlan_dev_slave(struct net_device *d) +{ + return d->priv_flags & IFF_IPVLAN_SLAVE; +} + +void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev); +void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); +void ipvlan_init_secret(void); +unsigned int ipvlan_mac_hash(const unsigned char *addr); +rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); +int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); +void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); +bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6); +struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, + const void *iaddr, bool is_v6); +void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); +#endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c new file mode 100644 index 00000000000..a14d8778324 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -0,0 +1,607 @@ +/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#include "ipvlan.h" + +static u32 ipvlan_jhash_secret; + +void ipvlan_init_secret(void) +{ + net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); +} + +static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, + unsigned int len, bool success, bool mcast) +{ + if (!ipvlan) + return; + + if (likely(success)) { + struct ipvl_pcpu_stats *pcptr; + + pcptr = this_cpu_ptr(ipvlan->pcpu_stats); + u64_stats_update_begin(&pcptr->syncp); + pcptr->rx_pkts++; + pcptr->rx_bytes += len; + if (mcast) + pcptr->rx_mcast++; + u64_stats_update_end(&pcptr->syncp); + } else { + this_cpu_inc(ipvlan->pcpu_stats->rx_errs); + } +} + +static u8 ipvlan_get_v6_hash(const void *iaddr) +{ + const struct in6_addr *ip6_addr = iaddr; + + return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & + IPVLAN_HASH_MASK; +} + +static u8 ipvlan_get_v4_hash(const void *iaddr) +{ + const struct in_addr *ip4_addr = iaddr; + + return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & + IPVLAN_HASH_MASK; +} + +struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, + const void *iaddr, bool is_v6) +{ + struct ipvl_addr *addr; + u8 hash; + + hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : + ipvlan_get_v4_hash(iaddr); + hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { + if (is_v6 && addr->atype == IPVL_IPV6 && + ipv6_addr_equal(&addr->ip6addr, iaddr)) + return addr; + else if (!is_v6 && addr->atype == IPVL_IPV4 && + addr->ip4addr.s_addr == + ((struct in_addr *)iaddr)->s_addr) + return addr; + } + return NULL; +} + +void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) +{ + struct ipvl_port *port = ipvlan->port; + u8 hash; + + hash = (addr->atype == IPVL_IPV6) ? + ipvlan_get_v6_hash(&addr->ip6addr) : + ipvlan_get_v4_hash(&addr->ip4addr); + hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); +} + +void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) +{ + hlist_del_rcu(&addr->hlnode); + if (sync) + synchronize_rcu(); +} + +bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) +{ + struct ipvl_port *port = ipvlan->port; + struct ipvl_addr *addr; + + list_for_each_entry(addr, &ipvlan->addrs, anode) { + if ((is_v6 && addr->atype == IPVL_IPV6 && + ipv6_addr_equal(&addr->ip6addr, iaddr)) || + (!is_v6 && addr->atype == IPVL_IPV4 && + addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) + return true; + } + + if (ipvlan_ht_addr_lookup(port, iaddr, is_v6)) + return true; + + return false; +} + +static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type) +{ + void *lyr3h = NULL; + + switch (skb->protocol) { + case htons(ETH_P_ARP): { + struct arphdr *arph; + + if (unlikely(!pskb_may_pull(skb, sizeof(*arph)))) + return NULL; + + arph = arp_hdr(skb); + *type = IPVL_ARP; + lyr3h = arph; + break; + } + case htons(ETH_P_IP): { + u32 pktlen; + struct iphdr *ip4h; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) + return NULL; + + ip4h = ip_hdr(skb); + pktlen = ntohs(ip4h->tot_len); + if (ip4h->ihl < 5 || ip4h->version != 4) + return NULL; + if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) + return NULL; + + *type = IPVL_IPV4; + lyr3h = ip4h; + break; + } + case htons(ETH_P_IPV6): { + struct ipv6hdr *ip6h; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) + return NULL; + + ip6h = ipv6_hdr(skb); + if (ip6h->version != 6) + return NULL; + + *type = IPVL_IPV6; + lyr3h = ip6h; + /* Only Neighbour Solicitation pkts need different treatment */ + if (ipv6_addr_any(&ip6h->saddr) && + ip6h->nexthdr == NEXTHDR_ICMP) { + *type = IPVL_ICMPV6; + lyr3h = ip6h + 1; + } + break; + } + default: + return NULL; + } + + return lyr3h; +} + +unsigned int ipvlan_mac_hash(const unsigned char *addr) +{ + u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), + ipvlan_jhash_secret); + + return hash & IPVLAN_MAC_FILTER_MASK; +} + +static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, + const struct ipvl_dev *in_dev, bool local) +{ + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_dev *ipvlan; + struct sk_buff *nskb; + unsigned int len; + unsigned int mac_hash; + int ret; + + if (skb->protocol == htons(ETH_P_PAUSE)) + return; + + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + if (local && (ipvlan == in_dev)) + continue; + + mac_hash = ipvlan_mac_hash(eth->h_dest); + if (!test_bit(mac_hash, ipvlan->mac_filters)) + continue; + + ret = NET_RX_DROP; + len = skb->len + ETH_HLEN; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto mcast_acct; + + if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + nskb->dev = ipvlan->dev; + if (local) + ret = dev_forward_skb(ipvlan->dev, nskb); + else + ret = netif_rx(nskb); +mcast_acct: + ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + } + + /* Locally generated? ...Forward a copy to the main-device as + * well. On the RX side we'll ignore it (wont give it to any + * of the virtual devices. + */ + if (local) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + dev_forward_skb(port->dev, nskb); + } + } +} + +static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, + bool local) +{ + struct ipvl_dev *ipvlan = addr->master; + struct net_device *dev = ipvlan->dev; + unsigned int len; + rx_handler_result_t ret = RX_HANDLER_CONSUMED; + bool success = false; + + len = skb->len + ETH_HLEN; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto out; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + + if (local) { + if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) + success = true; + } else { + ret = RX_HANDLER_ANOTHER; + success = true; + } + +out: + ipvlan_count_rx(ipvlan, len, success, false); + return ret; +} + +static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, + void *lyr3h, int addr_type, + bool use_dest) +{ + struct ipvl_addr *addr = NULL; + + if (addr_type == IPVL_IPV6) { + struct ipv6hdr *ip6h; + struct in6_addr *i6addr; + + ip6h = (struct ipv6hdr *)lyr3h; + i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; + addr = ipvlan_ht_addr_lookup(port, i6addr, true); + } else if (addr_type == IPVL_ICMPV6) { + struct nd_msg *ndmh; + struct in6_addr *i6addr; + + /* Make sure that the NeighborSolicitation ICMPv6 packets + * are handled to avoid DAD issue. + */ + ndmh = (struct nd_msg *)lyr3h; + if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { + i6addr = &ndmh->target; + addr = ipvlan_ht_addr_lookup(port, i6addr, true); + } + } else if (addr_type == IPVL_IPV4) { + struct iphdr *ip4h; + __be32 *i4addr; + + ip4h = (struct iphdr *)lyr3h; + i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; + addr = ipvlan_ht_addr_lookup(port, i4addr, false); + } else if (addr_type == IPVL_ARP) { + struct arphdr *arph; + unsigned char *arp_ptr; + __be32 dip; + + arph = (struct arphdr *)lyr3h; + arp_ptr = (unsigned char *)(arph + 1); + if (use_dest) + arp_ptr += (2 * port->dev->addr_len) + 4; + else + arp_ptr += port->dev->addr_len; + + memcpy(&dip, arp_ptr, 4); + addr = ipvlan_ht_addr_lookup(port, &dip, false); + } + + return addr; +} + +static int ipvlan_process_v4_outbound(struct sk_buff *skb) +{ + const struct iphdr *ip4h = ip_hdr(skb); + struct net_device *dev = skb->dev; + struct rtable *rt; + int err, ret = NET_XMIT_DROP; + struct flowi4 fl4 = { + .flowi4_oif = dev->iflink, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + + rt = ip_route_output_flow(dev_net(dev), &fl4, NULL); + if (IS_ERR(rt)) + goto err; + + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto err; + } + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} + +static int ipvlan_process_v6_outbound(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net_device *dev = skb->dev; + struct dst_entry *dst; + int err, ret = NET_XMIT_DROP; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_output(dev_net(dev), NULL, &fl6); + if (IS_ERR(dst)) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + err = ip6_local_out(skb); + if (unlikely(net_xmit_eval(err))) + dev->stats.tx_errors++; + else + ret = NET_XMIT_SUCCESS; + goto out; +err: + dev->stats.tx_errors++; + kfree_skb(skb); +out: + return ret; +} + +static int ipvlan_process_outbound(struct sk_buff *skb, + const struct ipvl_dev *ipvlan) +{ + struct ethhdr *ethh = eth_hdr(skb); + int ret = NET_XMIT_DROP; + + /* In this mode we dont care about multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + + /* The ipvlan is a pseudo-L2 device, so the packets that we receive + * will have L2; which need to discarded and processed further + * in the net-ns of the main-device. + */ + if (skb_mac_header_was_set(skb)) { + skb_pull(skb, sizeof(*ethh)); + skb->mac_header = (typeof(skb->mac_header))~0U; + skb_reset_network_header(skb); + } + + if (skb->protocol == htons(ETH_P_IPV6)) + ret = ipvlan_process_v6_outbound(skb); + else if (skb->protocol == htons(ETH_P_IP)) + ret = ipvlan_process_v4_outbound(skb); + else { + pr_warn_ratelimited("Dropped outbound packet type=%x\n", + ntohs(skb->protocol)); + kfree_skb(skb); + } +out: + return ret; +} + +static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + void *lyr3h; + struct ipvl_addr *addr; + int addr_type; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); + if (addr) + return ipvlan_rcv_frame(addr, skb, true); + +out: + skb->dev = ipvlan->phy_dev; + return ipvlan_process_outbound(skb, ipvlan); +} + +static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_addr *addr; + void *lyr3h; + int addr_type; + + if (ether_addr_equal(eth->h_dest, eth->h_source)) { + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (lyr3h) { + addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); + if (addr) + return ipvlan_rcv_frame(addr, skb, true); + } + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + /* Packet definitely does not belong to any of the + * virtual devices, but the dest is local. So forward + * the skb for the main-dev. At the RX side we just return + * RX_PASS for it to be processed further on the stack. + */ + return dev_forward_skb(ipvlan->phy_dev, skb); + + } else if (is_multicast_ether_addr(eth->h_dest)) { + u8 ip_summed = skb->ip_summed; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); + skb->ip_summed = ip_summed; + } + + skb->dev = ipvlan->phy_dev; + return dev_queue_xmit(skb); +} + +int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); + + if (!port) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) + goto out; + + switch(port->mode) { + case IPVLAN_MODE_L2: + return ipvlan_xmit_mode_l2(skb, dev); + case IPVLAN_MODE_L3: + return ipvlan_xmit_mode_l3(skb, dev); + } + + /* Should not reach here */ + WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", + port->mode); +out: + kfree_skb(skb); + return NET_XMIT_DROP; +} + +static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) +{ + struct ethhdr *eth = eth_hdr(skb); + struct ipvl_addr *addr; + void *lyr3h; + int addr_type; + + if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + return true; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); + if (addr) + return false; + } + + return true; +} + +static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, + struct ipvl_port *port) +{ + void *lyr3h; + int addr_type; + struct ipvl_addr *addr; + struct sk_buff *skb = *pskb; + rx_handler_result_t ret = RX_HANDLER_PASS; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); + if (addr) + ret = ipvlan_rcv_frame(addr, skb, false); + +out: + return ret; +} + +static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, + struct ipvl_port *port) +{ + struct sk_buff *skb = *pskb; + struct ethhdr *eth = eth_hdr(skb); + rx_handler_result_t ret = RX_HANDLER_PASS; + void *lyr3h; + int addr_type; + + if (is_multicast_ether_addr(eth->h_dest)) { + if (ipvlan_external_frame(skb, port)) + ipvlan_multicast_frame(port, skb, NULL, false); + } else { + struct ipvl_addr *addr; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + return ret; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); + if (addr) + ret = ipvlan_rcv_frame(addr, skb, false); + } + + return ret; +} + +rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); + + if (!port) + return RX_HANDLER_PASS; + + switch (port->mode) { + case IPVLAN_MODE_L2: + return ipvlan_handle_mode_l2(pskb, port); + case IPVLAN_MODE_L3: + return ipvlan_handle_mode_l3(pskb, port); + } + + /* Should not reach here */ + WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", + port->mode); + kfree_skb(skb); + return NET_RX_DROP; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c new file mode 100644 index 00000000000..c3df84bd285 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -0,0 +1,789 @@ +/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#include "ipvlan.h" + +void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) +{ + ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; +} + +void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval) +{ + struct ipvl_dev *ipvlan; + + if (port->mode != nval) { + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + if (nval == IPVLAN_MODE_L3) + ipvlan->dev->flags |= IFF_NOARP; + else + ipvlan->dev->flags &= ~IFF_NOARP; + } + port->mode = nval; + } +} + +static int ipvlan_port_create(struct net_device *dev) +{ + struct ipvl_port *port; + int err, idx; + + if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) { + netdev_err(dev, "Master is either lo or non-ether device\n"); + return -EINVAL; + } + port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->dev = dev; + port->mode = IPVLAN_MODE_L3; + INIT_LIST_HEAD(&port->ipvlans); + for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) + INIT_HLIST_HEAD(&port->hlhead[idx]); + + err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); + if (err) + goto err; + + dev->priv_flags |= IFF_IPVLAN_MASTER; + return 0; + +err: + kfree_rcu(port, rcu); + return err; +} + +static void ipvlan_port_destroy(struct net_device *dev) +{ + struct ipvl_port *port = ipvlan_port_get_rtnl(dev); + + dev->priv_flags &= ~IFF_IPVLAN_MASTER; + netdev_rx_handler_unregister(dev); + kfree_rcu(port, rcu); +} + +/* ipvlan network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key ipvlan_netdev_xmit_lock_key; +static struct lock_class_key ipvlan_netdev_addr_lock_key; + +#define IPVLAN_FEATURES \ + (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) + +#define IPVLAN_STATE_MASK \ + ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) + +static void ipvlan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key); +} + +static void ipvlan_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL); +} + +static int ipvlan_init(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + const struct net_device *phy_dev = ipvlan->phy_dev; + + dev->state = (dev->state & ~IPVLAN_STATE_MASK) | + (phy_dev->state & IPVLAN_STATE_MASK); + dev->features = phy_dev->features & IPVLAN_FEATURES; + dev->features |= NETIF_F_LLTX; + dev->gso_max_size = phy_dev->gso_max_size; + dev->iflink = phy_dev->ifindex; + dev->hard_header_len = phy_dev->hard_header_len; + + ipvlan_set_lockdep_class(dev); + + ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats); + if (!ipvlan->pcpu_stats) + return -ENOMEM; + + return 0; +} + +static void ipvlan_uninit(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan->port; + + if (ipvlan->pcpu_stats) + free_percpu(ipvlan->pcpu_stats); + + port->count -= 1; + if (!port->count) + ipvlan_port_destroy(port->dev); +} + +static int ipvlan_open(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_addr *addr; + + if (ipvlan->port->mode == IPVLAN_MODE_L3) + dev->flags |= IFF_NOARP; + else + dev->flags &= ~IFF_NOARP; + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_add(ipvlan, addr); + } + return dev_uc_add(phy_dev, phy_dev->dev_addr); +} + +static int ipvlan_stop(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + struct ipvl_addr *addr; + + dev_uc_unsync(phy_dev, dev); + dev_mc_unsync(phy_dev, dev); + + dev_uc_del(phy_dev, phy_dev->dev_addr); + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry(addr, &ipvlan->addrs, anode) + ipvlan_ht_addr_del(addr, !dev->dismantle); + } + return 0; +} + +netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + int skblen = skb->len; + int ret; + + ret = ipvlan_queue_xmit(skb, dev); + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { + struct ipvl_pcpu_stats *pcptr; + + pcptr = this_cpu_ptr(ipvlan->pcpu_stats); + + u64_stats_update_begin(&pcptr->syncp); + pcptr->tx_pkts++; + pcptr->tx_bytes += skblen; + u64_stats_update_end(&pcptr->syncp); + } else { + this_cpu_inc(ipvlan->pcpu_stats->tx_drps); + } + return ret; +} + +static netdev_features_t ipvlan_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES); +} + +static void ipvlan_change_rx_flags(struct net_device *dev, int change) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); +} + +static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) +{ + struct net_device *dev = ipvlan->dev; + unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); + + if (set && !test_bit(hashbit, ipvlan->mac_filters)) + __set_bit(hashbit, ipvlan->mac_filters); + else if (!set && test_bit(hashbit, ipvlan->mac_filters)) + __clear_bit(hashbit, ipvlan->mac_filters); +} + +static void ipvlan_set_multicast_mac_filter(struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { + bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); + } else { + struct netdev_hw_addr *ha; + DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); + + bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); + netdev_for_each_mc_addr(ha, dev) + __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); + + bitmap_copy(ipvlan->mac_filters, mc_filters, + IPVLAN_MAC_FILTER_SIZE); + } + dev_uc_sync(ipvlan->phy_dev, dev); + dev_mc_sync(ipvlan->phy_dev, dev); +} + +static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (ipvlan->pcpu_stats) { + struct ipvl_pcpu_stats *pcptr; + u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; + u32 rx_errs = 0, tx_drps = 0; + u32 strt; + int idx; + + for_each_possible_cpu(idx) { + pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); + do { + strt= u64_stats_fetch_begin_irq(&pcptr->syncp); + rx_pkts = pcptr->rx_pkts; + rx_bytes = pcptr->rx_bytes; + rx_mcast = pcptr->rx_mcast; + tx_pkts = pcptr->tx_pkts; + tx_bytes = pcptr->tx_bytes; + } while (u64_stats_fetch_retry_irq(&pcptr->syncp, + strt)); + + s->rx_packets += rx_pkts; + s->rx_bytes += rx_bytes; + s->multicast += rx_mcast; + s->tx_packets += tx_pkts; + s->tx_bytes += tx_bytes; + + /* u32 values are updated without syncp protection. */ + rx_errs += pcptr->rx_errs; + tx_drps += pcptr->tx_drps; + } + s->rx_errors = rx_errs; + s->rx_dropped = rx_errs; + s->tx_dropped = tx_drps; + } + return s; +} + +static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + return vlan_vid_add(phy_dev, proto, vid); +} + +static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, + u16 vid) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + vlan_vid_del(phy_dev, proto, vid); + return 0; +} + +static const struct net_device_ops ipvlan_netdev_ops = { + .ndo_init = ipvlan_init, + .ndo_uninit = ipvlan_uninit, + .ndo_open = ipvlan_open, + .ndo_stop = ipvlan_stop, + .ndo_start_xmit = ipvlan_start_xmit, + .ndo_fix_features = ipvlan_fix_features, + .ndo_change_rx_flags = ipvlan_change_rx_flags, + .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, + .ndo_get_stats64 = ipvlan_get_stats64, + .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, +}; + +static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + struct net_device *phy_dev = ipvlan->phy_dev; + + /* TODO Probably use a different field than dev_addr so that the + * mac-address on the virtual device is portable and can be carried + * while the packets use the mac-addr on the physical device. + */ + return dev_hard_header(skb, phy_dev, type, daddr, + saddr ? : dev->dev_addr, len); +} + +static const struct header_ops ipvlan_header_ops = { + .create = ipvlan_hard_header, + .rebuild = eth_rebuild_header, + .parse = eth_header_parse, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + +static int ipvlan_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + + return __ethtool_get_settings(ipvlan->phy_dev, cmd); +} + +static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); +} + +static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) +{ + const struct ipvl_dev *ipvlan = netdev_priv(dev); + + return ipvlan->msg_enable; +} + +static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + + ipvlan->msg_enable = value; +} + +static const struct ethtool_ops ipvlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_settings = ipvlan_ethtool_get_settings, + .get_drvinfo = ipvlan_ethtool_get_drvinfo, + .get_msglevel = ipvlan_ethtool_get_msglevel, + .set_msglevel = ipvlan_ethtool_set_msglevel, +}; + +static int ipvlan_nl_changelink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + + if (data && data[IFLA_IPVLAN_MODE]) { + u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + ipvlan_set_port_mode(port, nmode); + } + return 0; +} + +static size_t ipvlan_nl_getsize(const struct net_device *dev) +{ + return (0 + + nla_total_size(2) /* IFLA_IPVLAN_MODE */ + ); +} + +static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (data && data[IFLA_IPVLAN_MODE]) { + u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX) + return -EINVAL; + } + return 0; +} + +static int ipvlan_nl_fillinfo(struct sk_buff *skb, + const struct net_device *dev) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int ret = -EINVAL; + + if (!port) + goto err; + + ret = -EMSGSIZE; + if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) + goto err; + + return 0; + +err: + return ret; +} + +static int ipvlan_link_new(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port; + struct net_device *phy_dev; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!phy_dev) + return -ENODEV; + + if (ipvlan_dev_slave(phy_dev)) { + struct ipvl_dev *tmp = netdev_priv(phy_dev); + + phy_dev = tmp->phy_dev; + } else if (!ipvlan_dev_master(phy_dev)) { + err = ipvlan_port_create(phy_dev); + if (err < 0) + return err; + } + + port = ipvlan_port_get_rtnl(phy_dev); + if (data && data[IFLA_IPVLAN_MODE]) + port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); + + ipvlan->phy_dev = phy_dev; + ipvlan->dev = dev; + ipvlan->port = port; + ipvlan->sfeatures = IPVLAN_FEATURES; + INIT_LIST_HEAD(&ipvlan->addrs); + ipvlan->ipv4cnt = 0; + ipvlan->ipv6cnt = 0; + + /* TODO Probably put random address here to be presented to the + * world but keep using the physical-dev address for the outgoing + * packets. + */ + memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN); + + dev->priv_flags |= IFF_IPVLAN_SLAVE; + + port->count += 1; + err = register_netdevice(dev); + if (err < 0) + goto ipvlan_destroy_port; + + err = netdev_upper_dev_link(phy_dev, dev); + if (err) + goto ipvlan_destroy_port; + + list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); + netif_stacked_transfer_operstate(phy_dev, dev); + return 0; + +ipvlan_destroy_port: + port->count -= 1; + if (!port->count) + ipvlan_port_destroy(phy_dev); + + return err; +} + +static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) +{ + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_addr *addr, *next; + + if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { + list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { + ipvlan_ht_addr_del(addr, !dev->dismantle); + list_del_rcu(&addr->anode); + } + } + list_del_rcu(&ipvlan->pnode); + unregister_netdevice_queue(dev, head); + netdev_upper_dev_unlink(ipvlan->phy_dev, dev); +} + +static void ipvlan_link_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); + dev->priv_flags |= IFF_UNICAST_FLT; + dev->netdev_ops = &ipvlan_netdev_ops; + dev->destructor = free_netdev; + dev->header_ops = &ipvlan_header_ops; + dev->ethtool_ops = &ipvlan_ethtool_ops; + dev->tx_queue_len = 0; +} + +static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = +{ + [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, +}; + +static struct rtnl_link_ops ipvlan_link_ops = { + .kind = "ipvlan", + .priv_size = sizeof(struct ipvl_dev), + + .get_size = ipvlan_nl_getsize, + .policy = ipvlan_nl_policy, + .validate = ipvlan_nl_validate, + .fill_info = ipvlan_nl_fillinfo, + .changelink = ipvlan_nl_changelink, + .maxtype = IFLA_IPVLAN_MAX, + + .setup = ipvlan_link_setup, + .newlink = ipvlan_link_new, + .dellink = ipvlan_link_delete, +}; + +int ipvlan_link_register(struct rtnl_link_ops *ops) +{ + return rtnl_link_register(ops); +} + +static int ipvlan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ipvl_dev *ipvlan, *next; + struct ipvl_port *port; + LIST_HEAD(lst_kill); + + if (!ipvlan_dev_master(dev)) + return NOTIFY_DONE; + + port = ipvlan_port_get_rtnl(dev); + + switch (event) { + case NETDEV_CHANGE: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) + netif_stacked_transfer_operstate(ipvlan->phy_dev, + ipvlan->dev); + break; + + case NETDEV_UNREGISTER: + if (dev->reg_state != NETREG_UNREGISTERING) + break; + + list_for_each_entry_safe(ipvlan, next, &port->ipvlans, + pnode) + ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, + &lst_kill); + unregister_netdevice_many(&lst_kill); + break; + + case NETDEV_FEAT_CHANGE: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + ipvlan->dev->features = dev->features & IPVLAN_FEATURES; + ipvlan->dev->gso_max_size = dev->gso_max_size; + netdev_features_change(ipvlan->dev); + } + break; + + case NETDEV_CHANGEMTU: + list_for_each_entry(ipvlan, &port->ipvlans, pnode) + ipvlan_adjust_mtu(ipvlan, dev); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlying device to change its type. */ + return NOTIFY_BAD; + } + return NOTIFY_DONE; +} + +static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) +{ + struct ipvl_addr *addr; + + if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) { + netif_err(ipvlan, ifup, ipvlan->dev, + "Failed to add IPv6=%pI6c addr for %s intf\n", + ip6_addr, ipvlan->dev->name); + return -EINVAL; + } + addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); + if (!addr) + return -ENOMEM; + + addr->master = ipvlan; + memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); + addr->atype = IPVL_IPV6; + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + ipvlan->ipv6cnt++; + ipvlan_ht_addr_add(ipvlan, addr); + + return 0; +} + +static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) +{ + struct ipvl_addr *addr; + + addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true); + if (!addr) + return; + + ipvlan_ht_addr_del(addr, true); + list_del_rcu(&addr->anode); + ipvlan->ipv6cnt--; + WARN_ON(ipvlan->ipv6cnt < 0); + kfree_rcu(addr, rcu); + + return; +} + +static int ipvlan_addr6_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; + struct net_device *dev = (struct net_device *)if6->idev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + + if (!ipvlan_dev_slave(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + if (ipvlan_add_addr6(ipvlan, &if6->addr)) + return NOTIFY_BAD; + break; + + case NETDEV_DOWN: + ipvlan_del_addr6(ipvlan, &if6->addr); + break; + } + + return NOTIFY_OK; +} + +static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) +{ + struct ipvl_addr *addr; + + if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) { + netif_err(ipvlan, ifup, ipvlan->dev, + "Failed to add IPv4=%pI4 on %s intf.\n", + ip4_addr, ipvlan->dev->name); + return -EINVAL; + } + addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL); + if (!addr) + return -ENOMEM; + + addr->master = ipvlan; + memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); + addr->atype = IPVL_IPV4; + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + ipvlan->ipv4cnt++; + ipvlan_ht_addr_add(ipvlan, addr); + ipvlan_set_broadcast_mac_filter(ipvlan, true); + + return 0; +} + +static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) +{ + struct ipvl_addr *addr; + + addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false); + if (!addr) + return; + + ipvlan_ht_addr_del(addr, true); + list_del_rcu(&addr->anode); + ipvlan->ipv4cnt--; + WARN_ON(ipvlan->ipv4cnt < 0); + if (!ipvlan->ipv4cnt) + ipvlan_set_broadcast_mac_filter(ipvlan, false); + kfree_rcu(addr, rcu); + + return; +} + +static int ipvlan_addr4_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; + struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; + struct ipvl_dev *ipvlan = netdev_priv(dev); + struct in_addr ip4_addr; + + if (!ipvlan_dev_slave(dev)) + return NOTIFY_DONE; + + if (!ipvlan || !ipvlan->port) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + ip4_addr.s_addr = if4->ifa_address; + if (ipvlan_add_addr4(ipvlan, &ip4_addr)) + return NOTIFY_BAD; + break; + + case NETDEV_DOWN: + ip4_addr.s_addr = if4->ifa_address; + ipvlan_del_addr4(ipvlan, &ip4_addr); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr4_event, +}; + +static struct notifier_block ipvlan_notifier_block __read_mostly = { + .notifier_call = ipvlan_device_event, +}; + +static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { + .notifier_call = ipvlan_addr6_event, +}; + +static int __init ipvlan_init_module(void) +{ + int err; + + ipvlan_init_secret(); + register_netdevice_notifier(&ipvlan_notifier_block); + register_inet6addr_notifier(&ipvlan_addr6_notifier_block); + register_inetaddr_notifier(&ipvlan_addr4_notifier_block); + + err = ipvlan_link_register(&ipvlan_link_ops); + if (err < 0) + goto error; + + return 0; +error: + unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); + unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); + unregister_netdevice_notifier(&ipvlan_notifier_block); + return err; +} + +static void __exit ipvlan_cleanup_module(void) +{ + rtnl_link_unregister(&ipvlan_link_ops); + unregister_netdevice_notifier(&ipvlan_notifier_block); + unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); + unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); +} + +module_init(ipvlan_init_module); +module_exit(ipvlan_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); +MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); +MODULE_ALIAS_RTNL_LINK("ipvlan"); diff --git a/drivers/scsi/csiostor/csio_hw_chip.h b/drivers/scsi/csiostor/csio_hw_chip.h index bca0de61ae8..4752fed476d 100644 --- a/drivers/scsi/csiostor/csio_hw_chip.h +++ b/drivers/scsi/csiostor/csio_hw_chip.h @@ -36,60 +36,13 @@ #include "csio_defs.h" -/* FCoE device IDs for T4 */ -#define CSIO_DEVID_T440DBG_FCOE 0x4600 -#define CSIO_DEVID_T420CR_FCOE 0x4601 -#define CSIO_DEVID_T422CR_FCOE 0x4602 -#define CSIO_DEVID_T440CR_FCOE 0x4603 -#define CSIO_DEVID_T420BCH_FCOE 0x4604 -#define CSIO_DEVID_T440BCH_FCOE 0x4605 -#define CSIO_DEVID_T440CH_FCOE 0x4606 -#define CSIO_DEVID_T420SO_FCOE 0x4607 -#define CSIO_DEVID_T420CX_FCOE 0x4608 -#define CSIO_DEVID_T420BT_FCOE 0x4609 -#define CSIO_DEVID_T404BT_FCOE 0x460A -#define CSIO_DEVID_B420_FCOE 0x460B -#define CSIO_DEVID_B404_FCOE 0x460C -#define CSIO_DEVID_T480CR_FCOE 0x460D -#define CSIO_DEVID_T440LPCR_FCOE 0x460E -#define CSIO_DEVID_AMSTERDAM_T4_FCOE 0x460F -#define CSIO_DEVID_HUAWEI_T480_FCOE 0x4680 -#define CSIO_DEVID_HUAWEI_T440_FCOE 0x4681 -#define CSIO_DEVID_HUAWEI_STG310_FCOE 0x4682 -#define CSIO_DEVID_ACROMAG_XMC_XAUI 0x4683 -#define CSIO_DEVID_ACROMAG_XMC_SFP_FCOE 0x4684 -#define CSIO_DEVID_QUANTA_MEZZ_SFP_FCOE 0x4685 -#define CSIO_DEVID_HUAWEI_10GT_FCOE 0x4686 -#define CSIO_DEVID_HUAWEI_T440_TOE_FCOE 0x4687 - -/* FCoE device IDs for T5 */ -#define CSIO_DEVID_T580DBG_FCOE 0x5600 -#define CSIO_DEVID_T520CR_FCOE 0x5601 -#define CSIO_DEVID_T522CR_FCOE 0x5602 -#define CSIO_DEVID_T540CR_FCOE 0x5603 -#define CSIO_DEVID_T520BCH_FCOE 0x5604 -#define CSIO_DEVID_T540BCH_FCOE 0x5605 -#define CSIO_DEVID_T540CH_FCOE 0x5606 -#define CSIO_DEVID_T520SO_FCOE 0x5607 -#define CSIO_DEVID_T520CX_FCOE 0x5608 -#define CSIO_DEVID_T520BT_FCOE 0x5609 -#define CSIO_DEVID_T504BT_FCOE 0x560A -#define CSIO_DEVID_B520_FCOE 0x560B -#define CSIO_DEVID_B504_FCOE 0x560C -#define CSIO_DEVID_T580CR2_FCOE 0x560D -#define CSIO_DEVID_T540LPCR_FCOE 0x560E -#define CSIO_DEVID_AMSTERDAM_T5_FCOE 0x560F -#define CSIO_DEVID_T580LPCR_FCOE 0x5610 -#define CSIO_DEVID_T520LLCR_FCOE 0x5611 -#define CSIO_DEVID_T560CR_FCOE 0x5612 -#define CSIO_DEVID_T580CR_FCOE 0x5613 - /* Define MACRO values */ #define CSIO_HW_T4 0x4000 #define CSIO_T4_FCOE_ASIC 0x4600 #define CSIO_HW_T5 0x5000 #define CSIO_T5_FCOE_ASIC 0x5600 #define CSIO_HW_CHIP_MASK 0xF000 + #define T4_REGMAP_SIZE (160 * 1024) #define T5_REGMAP_SIZE (332 * 1024) #define FW_FNAME_T4 "cxgb4/t4fw.bin" diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 1ed5b21c0dd..34d20cc3e11 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -955,6 +955,10 @@ static int csio_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct csio_hw *hw; struct csio_lnode *ln; + /* probe only T5 cards */ + if (!csio_is_t5((pdev->device & CSIO_HW_CHIP_MASK))) + return -ENODEV; + rv = csio_pci_init(pdev, &bars); if (rv) goto err; @@ -1167,53 +1171,21 @@ static struct pci_error_handlers csio_err_handler = { .resume = csio_pci_resume, }; -static const struct pci_device_id csio_pci_tbl[] = { - CSIO_DEVICE(CSIO_DEVID_T440DBG_FCOE, 0), /* T4 DEBUG FCOE */ - CSIO_DEVICE(CSIO_DEVID_T420CR_FCOE, 0), /* T420CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T422CR_FCOE, 0), /* T422CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T440CR_FCOE, 0), /* T440CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T420BCH_FCOE, 0), /* T420BCH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T440BCH_FCOE, 0), /* T440BCH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T440CH_FCOE, 0), /* T440CH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T420SO_FCOE, 0), /* T420SO FCOE */ - CSIO_DEVICE(CSIO_DEVID_T420CX_FCOE, 0), /* T420CX FCOE */ - CSIO_DEVICE(CSIO_DEVID_T420BT_FCOE, 0), /* T420BT FCOE */ - CSIO_DEVICE(CSIO_DEVID_T404BT_FCOE, 0), /* T404BT FCOE */ - CSIO_DEVICE(CSIO_DEVID_B420_FCOE, 0), /* B420 FCOE */ - CSIO_DEVICE(CSIO_DEVID_B404_FCOE, 0), /* B404 FCOE */ - CSIO_DEVICE(CSIO_DEVID_T480CR_FCOE, 0), /* T480 CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T440LPCR_FCOE, 0), /* T440 LP-CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_AMSTERDAM_T4_FCOE, 0), /* AMSTERDAM T4 FCOE */ - CSIO_DEVICE(CSIO_DEVID_HUAWEI_T480_FCOE, 0), /* HUAWEI T480 FCOE */ - CSIO_DEVICE(CSIO_DEVID_HUAWEI_T440_FCOE, 0), /* HUAWEI T440 FCOE */ - CSIO_DEVICE(CSIO_DEVID_HUAWEI_STG310_FCOE, 0), /* HUAWEI STG FCOE */ - CSIO_DEVICE(CSIO_DEVID_ACROMAG_XMC_XAUI, 0), /* ACROMAG XAUI FCOE */ - CSIO_DEVICE(CSIO_DEVID_QUANTA_MEZZ_SFP_FCOE, 0),/* QUANTA MEZZ FCOE */ - CSIO_DEVICE(CSIO_DEVID_HUAWEI_10GT_FCOE, 0), /* HUAWEI 10GT FCOE */ - CSIO_DEVICE(CSIO_DEVID_HUAWEI_T440_TOE_FCOE, 0),/* HUAWEI T4 TOE FCOE */ - CSIO_DEVICE(CSIO_DEVID_T580DBG_FCOE, 0), /* T5 DEBUG FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520CR_FCOE, 0), /* T520CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T522CR_FCOE, 0), /* T522CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T540CR_FCOE, 0), /* T540CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520BCH_FCOE, 0), /* T520BCH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T540BCH_FCOE, 0), /* T540BCH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T540CH_FCOE, 0), /* T540CH FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520SO_FCOE, 0), /* T520SO FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520CX_FCOE, 0), /* T520CX FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520BT_FCOE, 0), /* T520BT FCOE */ - CSIO_DEVICE(CSIO_DEVID_T504BT_FCOE, 0), /* T504BT FCOE */ - CSIO_DEVICE(CSIO_DEVID_B520_FCOE, 0), /* B520 FCOE */ - CSIO_DEVICE(CSIO_DEVID_B504_FCOE, 0), /* B504 FCOE */ - CSIO_DEVICE(CSIO_DEVID_T580CR2_FCOE, 0), /* T580 CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T540LPCR_FCOE, 0), /* T540 LP-CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_AMSTERDAM_T5_FCOE, 0), /* AMSTERDAM T5 FCOE */ - CSIO_DEVICE(CSIO_DEVID_T580LPCR_FCOE, 0), /* T580 LP-CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T520LLCR_FCOE, 0), /* T520 LL-CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T560CR_FCOE, 0), /* T560 CR FCOE */ - CSIO_DEVICE(CSIO_DEVID_T580CR_FCOE, 0), /* T580 CR FCOE */ - { 0, 0, 0, 0, 0, 0, 0 } -}; +/* + * Macros needed to support the PCI Device ID Table ... + */ +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ + static struct pci_device_id csio_pci_tbl[] = { +/* Define for iSCSI uses PF5, FCoE uses PF6 */ +#define CH_PCI_DEVICE_ID_FUNCTION 0x5 +#define CH_PCI_DEVICE_ID_FUNCTION2 0x6 + +#define CH_PCI_ID_TABLE_ENTRY(devid) \ + { PCI_VDEVICE(CHELSIO, (devid)), 0 } + +#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } } +#include "t4_pci_id_tbl.h" static struct pci_driver csio_pci_driver = { .name = KBUILD_MODNAME, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5cd50878757..2cb772495f7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1230,6 +1230,8 @@ enum netdev_priv_flags { IFF_LIVE_ADDR_CHANGE = 1<<20, IFF_MACVLAN = 1<<21, IFF_XMIT_DST_RELEASE_PERM = 1<<22, + IFF_IPVLAN_MASTER = 1<<23, + IFF_IPVLAN_SLAVE = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1255,6 +1257,8 @@ enum netdev_priv_flags { #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE #define IFF_MACVLAN IFF_MACVLAN #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM +#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER +#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE /** * struct net_device - The DEVICE structure. diff --git a/include/net/netfilter/ipv4/nf_nat_redirect.h b/include/net/netfilter/ipv4/nf_nat_redirect.h new file mode 100644 index 00000000000..19e1df3a0a4 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_nat_redirect.h @@ -0,0 +1,9 @@ +#ifndef _NF_NAT_REDIRECT_IPV4_H_ +#define _NF_NAT_REDIRECT_IPV4_H_ + +unsigned int +nf_nat_redirect_ipv4(struct sk_buff *skb, + const struct nf_nat_ipv4_multi_range_compat *mr, + unsigned int hooknum); + +#endif /* _NF_NAT_REDIRECT_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_nat_redirect.h b/include/net/netfilter/ipv6/nf_nat_redirect.h new file mode 100644 index 00000000000..1ebdffc461c --- /dev/null +++ b/include/net/netfilter/ipv6/nf_nat_redirect.h @@ -0,0 +1,8 @@ +#ifndef _NF_NAT_REDIRECT_IPV6_H_ +#define _NF_NAT_REDIRECT_IPV6_H_ + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum); + +#endif /* _NF_NAT_REDIRECT_IPV6_H_ */ diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h new file mode 100644 index 00000000000..a2d67546afa --- /dev/null +++ b/include/net/netfilter/nft_redir.h @@ -0,0 +1,21 @@ +#ifndef _NFT_REDIR_H_ +#define _NFT_REDIR_H_ + +struct nft_redir { + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + u16 flags; +}; + +extern const struct nla_policy nft_redir_policy[]; + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + +#endif /* _NFT_REDIR_H_ */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index c809c1d2cea..93b70ade1ff 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -18,7 +18,7 @@ struct tcf_vlan { struct tcf_common common; int tcfv_action; - __be16 tcfv_push_vid; + u16 tcfv_push_vid; __be16 tcfv_push_proto; }; #define to_vlan(a) \ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 72298b6887a..a1e8175cc48 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -389,6 +389,7 @@ header-y += times.h header-y += timex.h header-y += tiocl.h header-y += tipc_config.h +header-y += tipc_netlink.h header-y += tipc.h header-y += toshiba.h header-y += tty_flags.h diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7072d832501..36bddc23363 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -330,6 +330,21 @@ enum macvlan_macaddr_mode { #define MACVLAN_FLAG_NOPROMISC 1 +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_MAX +}; + /* VXLAN section */ enum { IFLA_VXLAN_UNSPEC, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f31fe7b660a..832bc46db78 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -579,6 +579,7 @@ enum nft_exthdr_attributes { * @NFT_META_CPU: cpu id through smp_processor_id() * @NFT_META_IIFGROUP: packet input interface group * @NFT_META_OIFGROUP: packet output interface group + * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) */ enum nft_meta_keys { NFT_META_LEN, @@ -604,6 +605,7 @@ enum nft_meta_keys { NFT_META_CPU, NFT_META_IIFGROUP, NFT_META_OIFGROUP, + NFT_META_CGROUP, }; /** @@ -838,6 +840,22 @@ enum nft_masq_attributes { #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) /** + * enum nft_redir_attributes - nf_tables redirect expression netlink attributes + * + * @NFTA_REDIR_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_REDIR_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_REDIR_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_redir_attributes { + NFTA_REDIR_UNSPEC, + NFTA_REDIR_REG_PROTO_MIN, + NFTA_REDIR_REG_PROTO_MAX, + NFTA_REDIR_FLAGS, + __NFTA_REDIR_MAX +}; +#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) + +/** * enum nft_gen_attributes - nf_tables ruleset generation attributes * * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e5f5e69c7a7..c7e987ab336 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -653,15 +653,15 @@ static int __init test_rht_lookup(struct rhashtable *ht) return 0; } -static void test_bucket_stats(struct rhashtable *ht, - struct bucket_table *tbl, - bool quiet) +static void test_bucket_stats(struct rhashtable *ht, bool quiet) { - unsigned int cnt, i, total = 0; + unsigned int cnt, rcu_cnt, i, total = 0; struct test_obj *obj; + struct bucket_table *tbl; + tbl = rht_dereference_rcu(ht->tbl, ht); for (i = 0; i < tbl->size; i++) { - cnt = 0; + rcu_cnt = cnt = 0; if (!quiet) pr_info(" [%#4x/%zu]", i, tbl->size); @@ -673,6 +673,13 @@ static void test_bucket_stats(struct rhashtable *ht, pr_cont(" [%p],", obj); } + rht_for_each_entry_rcu(obj, tbl->buckets[i], node) + rcu_cnt++; + + if (rcu_cnt != cnt) + pr_warn("Test failed: Chain count mismach %d != %d", + cnt, rcu_cnt); + if (!quiet) pr_cont("\n [%#x] first element: %p, chain length: %u\n", i, tbl->buckets[i], cnt); @@ -680,6 +687,9 @@ static void test_bucket_stats(struct rhashtable *ht, pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", total, ht->nelems, TEST_ENTRIES); + + if (total != ht->nelems || total != TEST_ENTRIES) + pr_warn("Test failed: Total count mismatch ^^^"); } static int __init test_rhashtable(struct rhashtable *ht) @@ -710,8 +720,7 @@ static int __init test_rhashtable(struct rhashtable *ht) } rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); - test_bucket_stats(ht, tbl, true); + test_bucket_stats(ht, true); test_rht_lookup(ht); rcu_read_unlock(); @@ -735,6 +744,10 @@ static int __init test_rhashtable(struct rhashtable *ht) rcu_read_unlock(); } + rcu_read_lock(); + test_bucket_stats(ht, true); + rcu_read_unlock(); + pr_info(" Deleting %d keys\n", TEST_ENTRIES); for (i = 0; i < TEST_ENTRIES; i++) { u32 key = i * 2; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1a4f32c09ad..c190d22b6b3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -35,6 +35,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/route.h> +#include <net/netfilter/br_netfilter.h> #include <asm/uaccess.h> #include "br_private.h" diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 4c019d5c3f5..8358b2da154 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -104,6 +104,12 @@ config NF_NAT_MASQUERADE_IPV4 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection). +config NF_NAT_REDIRECT_IPV4 + tristate "IPv4 redirect support" + help + This is the kernel functionality to provide NAT in the redirect + flavour (redirect packets to local machine). + config NFT_MASQ_IPV4 tristate "IPv4 masquerading support for nf_tables" depends on NF_TABLES_IPV4 @@ -113,6 +119,15 @@ config NFT_MASQ_IPV4 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV4 + tristate "IPv4 redirect support for nf_tables" + depends on NF_TABLES_IPV4 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV4 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" depends on NF_CONNTRACK_SNMP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index f4cef5af096..902bcd1597b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o +obj-$(CONFIG_NF_NAT_REDIRECT_IPV4) += nf_nat_redirect_ipv4.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o @@ -40,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o +obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index ccfc78db12e..d059182c146 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,6 +10,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> @@ -74,12 +75,12 @@ static void dump_arp_packet(struct nf_log_buf *m, ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); } -void nf_log_arp_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) +static void nf_log_arp_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) { struct nf_log_buf *m; @@ -130,8 +131,17 @@ static int __init nf_log_arp_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_ARP, &nf_arp_logger); + ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_arp_net_ops); + return ret; } static void __exit nf_log_arp_exit(void) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 078bdca1b60..75101980eee 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> @@ -366,8 +367,17 @@ static int __init nf_log_ipv4_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv4_net_ops); + return ret; } static void __exit nf_log_ipv4_exit(void) diff --git a/net/ipv4/netfilter/nf_nat_redirect_ipv4.c b/net/ipv4/netfilter/nf_nat_redirect_ipv4.c new file mode 100644 index 00000000000..a220552fc53 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_redirect_ipv4.c @@ -0,0 +1,82 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/if.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/netfilter.h> +#include <linux/types.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> +#include <net/addrconf.h> +#include <net/checksum.h> +#include <net/protocol.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/ipv4/nf_nat_redirect.h> + +unsigned int +nf_nat_redirect_ipv4(struct sk_buff *skb, + const struct nf_nat_ipv4_multi_range_compat *mr, + unsigned int hooknum) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 newdst; + struct nf_nat_range newrange; + + NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_OUT); + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = htonl(0x7F000001); + } else { + struct in_device *indev; + struct in_ifaddr *ifa; + + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + if (indev != NULL) { + ifa = indev->ifa_list; + newdst = ifa->ifa_local; + } + rcu_read_unlock(); + + if (!newdst) + return NF_DROP; + } + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 1baaa83dfe5..536da7bc598 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -11,6 +11,7 @@ #include <net/tcp.h> #include <net/route.h> #include <net/dst.h> +#include <net/netfilter/ipv4/nf_reject.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/ipv4/nf_reject.h> diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c new file mode 100644 index 00000000000..643c5967aa2 --- /dev/null +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/ipv4/nf_nat_redirect.h> +#include <net/netfilter/nft_redir.h> + +static void nft_redir_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_ipv4_multi_range_compat mr; + unsigned int verdict; + + memset(&mr, 0, sizeof(mr)); + if (priv->sreg_proto_min) { + mr.range[0].min.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + mr.range[0].max.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + mr.range[0].flags |= priv->flags; + + verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv4_type; +static const struct nft_expr_ops nft_redir_ipv4_ops = { + .type = &nft_redir_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv4_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "redir", + .ops = &nft_redir_ipv4_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv4_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv4_type); +} + +static void __exit nft_redir_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv4_type); +} + +module_init(nft_redir_ipv4_module_init); +module_exit(nft_redir_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index ed33299c56d..d729542bd1b 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -19,9 +19,9 @@ #include <net/netfilter/ipv4/nf_reject.h> #include <net/netfilter/nft_reject.h> -void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -36,7 +36,6 @@ void nft_reject_ipv4_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); static struct nft_expr_type nft_reject_ipv4_type; static const struct nft_expr_ops nft_reject_ipv4_ops = { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6af874fc187..0dbe5c7953e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -82,6 +82,12 @@ config NF_NAT_MASQUERADE_IPV6 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. +config NF_NAT_REDIRECT_IPV6 + tristate "IPv6 redirect support" + help + This is the kernel functionality to provide NAT in the redirect + flavour (redirect packet to local machine) for IPv6. + config NFT_MASQ_IPV6 tristate "IPv6 masquerade support for nf_tables" depends on NF_TABLES_IPV6 @@ -91,6 +97,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV6 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbb25f01143..d2ac9f5f212 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o +obj-$(CONFIG_NF_NAT_REDIRECT_IPV6) += nf_nat_redirect_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o @@ -36,6 +37,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7b17a0be93e..7fc34d1681a 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> @@ -398,8 +399,17 @@ static int __init nf_log_ipv6_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + return ret; } static void __exit nf_log_ipv6_exit(void) diff --git a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c new file mode 100644 index 00000000000..ea1308aeb04 --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c @@ -0,0 +1,75 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/if.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/netfilter.h> +#include <linux/types.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/x_tables.h> +#include <net/addrconf.h> +#include <net/checksum.h> +#include <net/protocol.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/ipv6/nf_nat_redirect.h> + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum) +{ + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = loopback_addr; + } else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 015eb8a8076..d05b36440e8 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -11,6 +11,7 @@ #include <net/ip6_route.h> #include <net/ip6_fib.h> #include <net/ip6_checksum.h> +#include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/ipv6/nf_reject.h> diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 00000000000..83420eeaad1 --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nft_redir.h> +#include <net/netfilter/ipv6/nf_nat_redirect.h> + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 0bc19fa8782..f7328592414 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv6/nf_reject.h> -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,7 +38,6 @@ void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ae5096ab65e..57f15a9aa48 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -505,6 +505,15 @@ config NFT_MASQ This option adds the "masquerade" expression that you can use to perform NAT in the masquerade flavour. +config NFT_REDIR + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables redirect support" + help + This options adds the "redirect" expression that you can use + to perform NAT in the redirect flavour. + config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK @@ -835,6 +844,8 @@ config NETFILTER_XT_TARGET_RATEEST config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT + select NF_NAT_REDIRECT_IPV4 if NF_NAT_IPV4 + select NF_NAT_REDIRECT_IPV6 if NF_NAT_IPV6 ---help--- REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a9571be3f79..f3eb4680f2e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o +obj-$(CONFIG_NFT_REDIR) += nft_redir.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 024a2e25c8a..fea9ef56642 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -17,6 +17,7 @@ #include <linux/interrupt.h> #include <linux/if.h> #include <linux/netdevice.h> +#include <linux/netfilter_ipv6.h> #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/mutex.h> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ac7ba689efe..b8295a430a5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -465,8 +465,7 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) static void ip_vs_service_free(struct ip_vs_service *svc) { - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); + free_percpu(svc->stats.cpustats); kfree(svc); } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 1a82b29ce8e..0df17caa8af 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -37,8 +37,7 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) rcu_read_unlock(); return pe; } - if (pe->module) - module_put(pe->module); + module_put(pe->module); } rcu_read_unlock(); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 4dbcda6258b..199760c71f3 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -104,8 +104,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) mutex_unlock(&ip_vs_sched_mutex); return sched; } - if (sched->module) - module_put(sched->module); + module_put(sched->module); } mutex_unlock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7162c86fd50..c47ffd7a0a7 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -820,8 +820,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { - if (p->pe->module) - module_put(p->pe->module); + module_put(p->pe->module); return -ENOMEM; } p->pe_data_len = pe_data_len; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bd90bf8107d..1f933136155 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -293,7 +293,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, &dest->addr.ip, &dest_dst->dst_saddr.ip, atomic_read(&rt->dst.__refcnt)); } - daddr = dest->addr.ip; if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3a3a60b126e..1d69f5b9748 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -728,7 +728,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ -static int callforward_do_filter(const union nf_inet_addr *src, +static int callforward_do_filter(struct net *net, + const union nf_inet_addr *src, const union nf_inet_addr *dst, u_int8_t family) { @@ -750,9 +751,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + if (!afinfo->route(net, (struct dst_entry **)&rt1, flowi4_to_flowi(&fl1), false)) { - if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + if (!afinfo->route(net, (struct dst_entry **)&rt2, flowi4_to_flowi(&fl2), false)) { if (rt_nexthop(rt1, fl1.daddr) == rt_nexthop(rt2, fl2.daddr) && @@ -774,9 +775,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; - if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + if (!afinfo->route(net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { - if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + if (!afinfo->route(net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { if (ipv6_addr_equal(rt6_nexthop(rt1), rt6_nexthop(rt2)) && @@ -807,6 +808,7 @@ static int expect_callforwarding(struct sk_buff *skb, __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; + struct net *net = nf_ct_net(ct); typeof(nat_callforwarding_hook) nat_callforwarding; /* Read alternativeAddress */ @@ -816,7 +818,7 @@ static int expect_callforwarding(struct sk_buff *skb, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ if (callforward_filter && - callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3, + callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3, nf_ct_l3num(ct))) { pr_debug("nf_ct_q931: Call Forwarding not tracked\n"); return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 5b3eae7d4c9..bd9d3153790 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -250,7 +250,7 @@ out: } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); -/* appropiate ct lock protecting must be taken by caller */ +/* appropriate ct lock protecting must be taken by caller */ static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index d7197649dba..49a64174f3f 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -19,6 +19,9 @@ static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); +#define nft_log_dereference(logger) \ + rcu_dereference_protected(logger, lockdep_is_held(&nf_log_mutex)) + static struct nf_logger *__find_logger(int pf, const char *str_logger) { struct nf_logger *log; @@ -28,8 +31,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) if (loggers[pf][i] == NULL) continue; - log = rcu_dereference_protected(loggers[pf][i], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(loggers[pf][i]); if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } @@ -45,8 +47,7 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) return; mutex_lock(&nf_log_mutex); - log = rcu_dereference_protected(net->nf.nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(net->nf.nf_loggers[pf]); if (log == NULL) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); @@ -61,8 +62,7 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger) mutex_lock(&nf_log_mutex); for (i = 0; i < NFPROTO_NUMPROTO; i++) { - log = rcu_dereference_protected(net->nf.nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(net->nf.nf_loggers[i]); if (log == logger) RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); } @@ -75,6 +75,7 @@ EXPORT_SYMBOL(nf_log_unset); int nf_log_register(u_int8_t pf, struct nf_logger *logger) { int i; + int ret = 0; if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; @@ -82,16 +83,25 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + if (rcu_access_pointer(loggers[i][logger->type])) { + ret = -EEXIST; + goto unlock; + } + } for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) rcu_assign_pointer(loggers[i][logger->type], logger); } else { - /* register at end of list to honor first register win */ + if (rcu_access_pointer(loggers[pf][logger->type])) { + ret = -EEXIST; + goto unlock; + } rcu_assign_pointer(loggers[pf][logger->type], logger); } +unlock: mutex_unlock(&nf_log_mutex); - - return 0; + return ret; } EXPORT_SYMBOL(nf_log_register); @@ -144,8 +154,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; - logger = loggers[pf][type]; - if (logger == NULL) + if (rcu_access_pointer(loggers[pf][type]) == NULL) request_module("nf-logger-%u-%u", pf, type); rcu_read_lock(); @@ -297,8 +306,7 @@ static int seq_show(struct seq_file *s, void *v) int i, ret; struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(net->nf.nf_loggers[*pos]); if (!logger) ret = seq_printf(s, "%2lld NONE (", *pos); @@ -312,8 +320,7 @@ static int seq_show(struct seq_file *s, void *v) if (loggers[*pos][i] == NULL) continue; - logger = rcu_dereference_protected(loggers[*pos][i], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(loggers[*pos][i]); ret = seq_printf(s, "%s", logger->name); if (ret < 0) return ret; @@ -385,8 +392,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) table->data = "NONE"; else diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 66e8425dbfe..129a8daa4ab 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2477,7 +2477,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; - /* Verify existance before starting dump */ + /* Verify existence before starting dump */ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); if (err < 0) return err; @@ -3665,8 +3665,7 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (nft_trans_chain_stats(trans)) - free_percpu(nft_trans_chain_stats(trans)); + free_percpu(nft_trans_chain_stats(trans)); nft_trans_destroy(trans); } else { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 5f1be5ba355..11d85b3813f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -12,6 +12,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/skbuff.h> #include <linux/if_arp.h> @@ -337,9 +340,6 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); - if (!skb) - pr_err("nfnetlink_log: can't even alloc %u bytes\n", - pkt_size); } } @@ -570,10 +570,8 @@ __build_packet_message(struct nfnl_log_net *log, struct nlattr *nla; int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); - return -1; - } + if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + goto nla_put_failure; nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len)); nla->nla_type = NFULA_PAYLOAD; @@ -1069,19 +1067,19 @@ static int __init nfnetlink_log_init(void) netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { - pr_err("log: failed to create netlink socket\n"); + pr_err("failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); if (status < 0) { - pr_err("log: failed to register logger\n"); + pr_err("failed to register logger\n"); goto cleanup_subsys; } status = register_pernet_subsys(&nfnl_log_net_ops); if (status < 0) { - pr_err("log: failed to register pernet ops\n"); + pr_err("failed to register pernet ops\n"); goto cleanup_logger; } return status; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1e7c076ca63..e99911eda91 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -165,6 +165,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; dest->data[0] = out->group; break; + case NFT_META_CGROUP: + if (skb->sk == NULL) + break; + + dest->data[0] = skb->sk->sk_classid; + break; default: WARN_ON(1); goto err; @@ -240,6 +246,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: + case NFT_META_CGROUP: break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c new file mode 100644 index 00000000000..9e8093f2831 --- /dev/null +++ b/net/netfilter/nft_redir.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_redir.h> + +const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { + [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_REDIR_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL_GPL(nft_redir_policy); + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_redir *priv = nft_expr_priv(expr); + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MIN]) { + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN])); + + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MAX]) { + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX])); + + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } + + if (tb[NFTA_REDIR_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_redir_init); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_redir *priv = nft_expr_priv(expr); + + if (priv->sreg_proto_min) { + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN, + htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX, + htonl(priv->sreg_proto_max))) + goto nla_put_failure; + } + + if (priv->flags != 0 && + nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} +EXPORT_SYMBOL_GPL(nft_redir_dump); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_redir_validate); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index ae8271652ef..3f83d38c4e5 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -37,7 +37,8 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } @@ -54,7 +55,8 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 22a10309297..b6ec67efd90 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -26,48 +26,13 @@ #include <net/checksum.h> #include <net/protocol.h> #include <net/netfilter/nf_nat.h> - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; +#include <net/netfilter/ipv4/nf_nat_redirect.h> +#include <net/netfilter/ipv6/nf_nat_redirect.h> static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = loopback_addr; - else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) @@ -98,48 +63,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index fbc66bb250d..29ba6218a82 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -134,6 +134,7 @@ static bool add_hlist(struct hlist_head *head, static unsigned int check_hlist(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, + u16 zone, bool *addit) { const struct nf_conntrack_tuple_hash *found; @@ -147,8 +148,7 @@ static unsigned int check_hlist(struct net *net, /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, - &conn->tuple); + found = nf_conntrack_find_get(net, zone, &conn->tuple); if (found == NULL) { hlist_del(&conn->node); kmem_cache_free(connlimit_conn_cachep, conn); @@ -201,7 +201,7 @@ static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u8 family) + u8 family, u16 zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; @@ -229,7 +229,7 @@ count_tree(struct net *net, struct rb_root *root, } else { /* same source network -> be counted! */ unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, &addit); + count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); tree_nodes_free(root, gc_nodes, gc_count); if (!addit) @@ -245,7 +245,7 @@ count_tree(struct net *net, struct rb_root *root, continue; /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, &addit); + check_hlist(net, &rbconn->hhead, tuple, zone, &addit); if (hlist_empty(&rbconn->hhead)) gc_nodes[gc_count++] = rbconn; } @@ -290,7 +290,7 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family) + u_int8_t family, u16 zone) { struct rb_root *root; int count; @@ -306,7 +306,7 @@ static int count_them(struct net *net, spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - count = count_tree(net, root, tuple, addr, mask, family); + count = count_tree(net, root, tuple, addr, mask, family, zone); spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -324,13 +324,16 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; + u16 zone = NF_CT_DEFAULT_ZONE; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) + if (ct != NULL) { tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) + zone = nf_ct_zone(ct); + } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) { goto hotdrop; + } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -343,7 +346,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family); + &info->mask, par->family, zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index dcf3589e3cc..556b26ad4b1 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -767,7 +767,8 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) tipc_bclink_unlock(); } -int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, struct tipc_stats *stats) +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) { int i; struct nlattr *nest; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 5f6f32369c1..463db5b15b8 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -647,7 +647,8 @@ void tipc_bearer_stop(void) } /* Caller should hold rtnl_lock to protect the bearer */ -int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer) +static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, + struct tipc_bearer *bearer) { void *hdr; struct nlattr *attrs; @@ -905,7 +906,8 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) return 0; } -int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media) +static int __tipc_nl_add_media(struct tipc_nl_msg *msg, + struct tipc_media *media) { void *hdr; struct nlattr *attrs; diff --git a/net/tipc/link.c b/net/tipc/link.c index 629e8cf393b..4738cb1bf7c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2514,7 +2514,8 @@ out: return res; } -int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) + +static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; struct nlattr *stats; @@ -2580,7 +2581,7 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) +static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) { int err; void *hdr; @@ -2649,8 +2650,9 @@ msg_full: } /* Caller should hold node lock */ -int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, struct tipc_node *node, - u32 *prev_link) +static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, + struct tipc_node *node, + u32 *prev_link) { u32 i; int err; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 30ca8e0e0f8..7cfb7a4aa58 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1002,8 +1002,9 @@ void tipc_nametbl_stop(void) write_unlock_bh(&tipc_nametbl_lock); } -int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, struct name_seq *seq, - struct sub_seq *sseq, u32 *last_publ) +static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, + struct name_seq *seq, + struct sub_seq *sseq, u32 *last_publ) { void *hdr; struct nlattr *attrs; @@ -1071,8 +1072,8 @@ msg_full: return -EMSGSIZE; } -int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, - u32 *last_lower, u32 *last_publ) +static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, + u32 *last_lower, u32 *last_publ) { struct sub_seq *sseq; struct sub_seq *sseq_start; @@ -1098,8 +1099,8 @@ int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, return 0; } -int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, u32 *last_lower, - u32 *last_publ) +static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, + u32 *last_lower, u32 *last_publ) { struct hlist_head *seq_head; struct name_seq *seq; diff --git a/net/tipc/node.c b/net/tipc/node.c index 72a75d4838b..82e5edddc37 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -609,7 +609,7 @@ void tipc_node_unlock(struct tipc_node *node) } /* Caller should hold node lock for the passed node */ -int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) +static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { void *hdr; struct nlattr *attrs; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b3415a78da2..341fbd1b5f7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2810,7 +2810,7 @@ void tipc_socket_stop(void) } /* Caller should hold socket lock for the passed tipc socket. */ -int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) { u32 peer_node; u32 peer_port; @@ -2845,8 +2845,8 @@ msg_full: } /* Caller should hold socket lock for the passed tipc socket. */ -int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, - struct tipc_sock *tsk) +static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk) { int err; void *hdr; @@ -2911,8 +2911,9 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) } /* Caller should hold socket lock for the passed tipc socket. */ -int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, - struct publication *publ) +static int __tipc_nl_add_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct publication *publ) { void *hdr; struct nlattr *attrs; @@ -2949,8 +2950,9 @@ msg_cancel: } /* Caller should hold socket lock for the passed tipc socket. */ -int __tipc_nl_list_sk_publ(struct sk_buff *skb, struct netlink_callback *cb, - struct tipc_sock *tsk, u32 *last_publ) +static int __tipc_nl_list_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk, u32 *last_publ) { int err; struct publication *p; |