summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt2
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-systemport.txt2
-rw-r--r--Documentation/devicetree/bindings/net/fixed-link.txt12
-rw-r--r--Documentation/devicetree/bindings/net/fsl-tsec-phy.txt5
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c32
-rw-r--r--drivers/atm/idt77252.c8
-rw-r--r--drivers/net/bonding/bond_main.c1
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c17
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.h1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c21
-rw-r--r--drivers/net/ethernet/cisco/enic/enic.h30
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c61
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c150
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_cq.h9
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c122
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c17
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c14
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c14
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c63
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c8
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c11
-rw-r--r--drivers/net/ethernet/intel/i40evf/Makefile5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.c5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h27
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_alloc.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_hmc.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_osdep.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_prototype.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_register.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_status.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c3
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h12
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h3
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c84
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c40
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c3
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c304
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c172
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c4
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c6
-rw-r--r--drivers/net/ieee802154/fakelb.c6
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/tun.c54
-rw-r--r--drivers/net/wimax/i2400m/driver.c7
-rw-r--r--drivers/of/of_mdio.c38
-rw-r--r--include/linux/filter.h6
-rw-r--r--include/linux/netdevice.h14
-rw-r--r--include/linux/of_mdio.h10
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/net/netfilter/nf_tables.h130
-rw-r--r--include/net/netfilter/nft_meta.h36
-rw-r--r--include/net/tcp.h11
-rw-r--r--include/net/tso.h20
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h37
-rw-r--r--kernel/seccomp.c6
-rw-r--r--lib/test_bpf.c4
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/bridge/Makefile4
-rw-r--r--net/bridge/br.c98
-rw-r--r--net/bridge/br_notify.c118
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/netfilter/Kconfig14
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c139
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/filter.c44
-rw-r--r--net/core/tso.c72
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/mac802154/llsec.c6
-rw-r--r--net/netfilter/nf_tables_api.c1236
-rw-r--r--net/netfilter/nft_ct.c96
-rw-r--r--net/netfilter/nft_hash.c54
-rw-r--r--net/netfilter/nft_lookup.c10
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_rbtree.c21
89 files changed, 2680 insertions, 1107 deletions
diff --git a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt
index f2febb94550..451fef26b4d 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt
@@ -24,7 +24,7 @@ Optional properties:
- fixed-link: When the GENET interface is connected to a MoCA hardware block or
when operating in a RGMII to RGMII type of connection, or when the MDIO bus is
voluntarily disabled, this property should be used to describe the "fixed link".
- See Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for information on
+ See Documentation/devicetree/bindings/net/fixed-link.txt for information on
the property specifics
Required child nodes:
diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt
index 1b7600e022d..c183ea90d9b 100644
--- a/Documentation/devicetree/bindings/net/broadcom-systemport.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt
@@ -8,7 +8,7 @@ Required properties:
- local-mac-address: Ethernet MAC address (48 bits) of this adapter
- phy-mode: Should be a string describing the PHY interface to the
Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt
-- fixed-link: see Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for
+- fixed-link: see Documentation/devicetree/bindings/net/fixed-link.txt for
the property specific details
Optional properties:
diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
index e956de1be93..82bf7e0f47b 100644
--- a/Documentation/devicetree/bindings/net/fixed-link.txt
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt
@@ -18,6 +18,18 @@ properties:
* 'asym-pause' (boolean, optional), to indicate that asym_pause should
be enabled.
+Old, deprecated 'fixed-link' binding:
+
+* A 'fixed-link' property in the Ethernet MAC node, with 5 cells, of the
+ form <a b c d e> with the following accepted values:
+ - a: emulated PHY ID, choose any but but unique to the all specified
+ fixed-links, from 0 to 31
+ - b: duplex configuration: 0 for half duplex, 1 for full duplex
+ - c: link speed in Mbits/sec, accepted values are: 10, 100 and 1000
+ - d: pause configuration: 0 for no pause, 1 for pause
+ - e: asymmetric pause configuration: 0 for no asymmetric pause, 1 for
+ asymmetric pause
+
Example:
ethernet@0 {
diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index 737cdef4f90..be6ea8960f2 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -42,10 +42,7 @@ Properties:
interrupt. For TSEC and eTSEC devices, the first interrupt is
transmit, the second is receive, and the third is error.
- phy-handle : See ethernet.txt file in the same directory.
- - fixed-link : <a b c d e> where a is emulated phy id - choose any,
- but unique to the all specified fixed-links, b is duplex - 0 half,
- 1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no
- pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause.
+ - fixed-link : See fixed-link.txt in the same directory.
- phy-connection-type : See ethernet.txt file in the same directory.
This property is only really needed if the connection is of type
"rgmii-id", as all other connection types are detected by hardware.
diff --git a/MAINTAINERS b/MAINTAINERS
index bde15ffcccf..f5de16eb195 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2228,9 +2228,8 @@ F: drivers/platform/chrome/
CISCO VIC ETHERNET NIC DRIVER
M: Christian Benvenuti <benve@cisco.com>
M: Sujith Sankar <ssujith@cisco.com>
-M: Govindarajulu Varadarajan <govindarajulu90@gmail.com>
+M: Govindarajulu Varadarajan <_govind@gmx.com>
M: Neel Patel <neepatel@cisco.com>
-M: Nishank Trivedi <nistrive@cisco.com>
S: Supported
F: drivers/net/ethernet/cisco/enic/
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 228cf91b91c..ffd1169ebaa 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -25,7 +25,6 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/phy.h>
-#include <linux/phy_fixed.h>
#include <linux/spi/spi.h>
#include <linux/fsl_devices.h>
#include <linux/fs_enet_pd.h>
@@ -178,37 +177,6 @@ u32 get_baudrate(void)
EXPORT_SYMBOL(get_baudrate);
#endif /* CONFIG_CPM2 */
-#ifdef CONFIG_FIXED_PHY
-static int __init of_add_fixed_phys(void)
-{
- int ret;
- struct device_node *np;
- u32 *fixed_link;
- struct fixed_phy_status status = {};
-
- for_each_node_by_name(np, "ethernet") {
- fixed_link = (u32 *)of_get_property(np, "fixed-link", NULL);
- if (!fixed_link)
- continue;
-
- status.link = 1;
- status.duplex = fixed_link[1];
- status.speed = fixed_link[2];
- status.pause = fixed_link[3];
- status.asym_pause = fixed_link[4];
-
- ret = fixed_phy_add(PHY_POLL, fixed_link[0], &status);
- if (ret) {
- of_node_put(np);
- return ret;
- }
- }
-
- return 0;
-}
-arch_initcall(of_add_fixed_phys);
-#endif /* CONFIG_FIXED_PHY */
-
#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
static __be32 __iomem *rstcr;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 1bdf104e90b..b621f56a36b 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -2551,12 +2551,12 @@ done:
timeout = 5 * 1000;
while (atomic_read(&vc->scq->used) > 0) {
timeout = msleep_interruptible(timeout);
- if (!timeout)
+ if (!timeout) {
+ pr_warn("%s: SCQ drain timeout: %u used\n",
+ card->name, atomic_read(&vc->scq->used));
break;
+ }
}
- if (!timeout)
- printk("%s: SCQ drain timeout: %u used\n",
- card->name, atomic_read(&vc->scq->used));
writel(TCMDQ_HALT | vc->index, SAR_REG_TCMDQ);
clear_scd(card, vc->scq, vc->class);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 71232053210..9071139d287 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1038,6 +1038,7 @@ static void bond_compute_features(struct bonding *bond)
if (!bond_has_slaves(bond))
goto done;
+ vlan_features &= NETIF_F_ALL_FOR_ALL;
bond_for_each_slave(bond, slave, iter) {
vlan_features = netdev_increment_features(vlan_features,
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d40c5b969e9..dc708a888f8 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1327,8 +1327,8 @@ static int bcm_sysport_open(struct net_device *dev)
/* Read CRC forward */
priv->crc_fwd = !!(umac_readl(priv, UMAC_CMD) & CMD_CRC_FWD);
- priv->phydev = of_phy_connect_fixed_link(dev, bcm_sysport_adj_link,
- priv->phy_interface);
+ priv->phydev = of_phy_connect(dev, priv->phy_dn, bcm_sysport_adj_link,
+ 0, priv->phy_interface);
if (!priv->phydev) {
netdev_err(dev, "could not attach to PHY\n");
return -ENODEV;
@@ -1551,6 +1551,19 @@ static int bcm_sysport_probe(struct platform_device *pdev)
if (priv->phy_interface < 0)
priv->phy_interface = PHY_INTERFACE_MODE_GMII;
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (of_phy_is_fixed_link(dn)) {
+ ret = of_phy_register_fixed_link(dn);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register fixed PHY\n");
+ goto err;
+ }
+
+ priv->phy_dn = dn;
+ }
+
/* Initialize netdevice members */
macaddr = of_get_mac_address(dn);
if (!macaddr || !is_valid_ether_addr(macaddr)) {
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index abdeb62616d..73fd04a9479 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -656,6 +656,7 @@ struct bcm_sysport_priv {
unsigned int rx_c_index;
/* PHY device */
+ struct device_node *phy_dn;
struct phy_device *phydev;
phy_interface_t phy_interface;
int old_pause;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 4608673beaf..add8d859608 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -298,6 +298,7 @@ int bcmgenet_mii_config(struct net_device *dev)
static int bcmgenet_mii_probe(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
+ struct device_node *dn = priv->pdev->dev.of_node;
struct phy_device *phydev;
unsigned int phy_flags;
int ret;
@@ -307,15 +308,19 @@ static int bcmgenet_mii_probe(struct net_device *dev)
return 0;
}
- if (priv->phy_dn)
- phydev = of_phy_connect(dev, priv->phy_dn,
- bcmgenet_mii_setup, 0,
- priv->phy_interface);
- else
- phydev = of_phy_connect_fixed_link(dev,
- bcmgenet_mii_setup,
- priv->phy_interface);
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (of_phy_is_fixed_link(dn)) {
+ ret = of_phy_register_fixed_link(dn);
+ if (ret)
+ return ret;
+
+ priv->phy_dn = dn;
+ }
+ phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, 0,
+ priv->phy_interface);
if (!phydev) {
pr_err("could not attach to PHY\n");
return -ENODEV;
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index e35c8e0202a..f23ef321606 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -43,6 +43,8 @@
#define ENIC_CQ_MAX (ENIC_WQ_MAX + ENIC_RQ_MAX)
#define ENIC_INTR_MAX (ENIC_CQ_MAX + 2)
+#define ENIC_AIC_LARGE_PKT_DIFF 3
+
struct enic_msix_entry {
int requested;
char devname[IFNAMSIZ];
@@ -50,6 +52,33 @@ struct enic_msix_entry {
void *devid;
};
+/* Store only the lower range. Higher range is given by fw. */
+struct enic_intr_mod_range {
+ u32 small_pkt_range_start;
+ u32 large_pkt_range_start;
+};
+
+struct enic_intr_mod_table {
+ u32 rx_rate;
+ u32 range_percent;
+};
+
+#define ENIC_MAX_LINK_SPEEDS 3
+#define ENIC_LINK_SPEED_10G 10000
+#define ENIC_LINK_SPEED_4G 4000
+#define ENIC_LINK_40G_INDEX 2
+#define ENIC_LINK_10G_INDEX 1
+#define ENIC_LINK_4G_INDEX 0
+#define ENIC_RX_COALESCE_RANGE_END 125
+#define ENIC_AIC_TS_BREAK 100
+
+struct enic_rx_coal {
+ u32 small_pkt_range_start;
+ u32 large_pkt_range_start;
+ u32 range_end;
+ u32 use_adaptive_rx_coalesce;
+};
+
/* priv_flags */
#define ENIC_SRIOV_ENABLED (1 << 0)
@@ -92,6 +121,7 @@ struct enic {
unsigned int mc_count;
unsigned int uc_count;
u32 port_mtu;
+ struct enic_rx_coal rx_coalesce_setting;
u32 rx_coalesce_usecs;
u32 tx_coalesce_usecs;
#ifdef CONFIG_PCI_IOV
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 58a8c67638e..1882db230e1 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -79,6 +79,17 @@ static const struct enic_stat enic_rx_stats[] = {
static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
+void enic_intr_coal_set_rx(struct enic *enic, u32 timer)
+{
+ int i;
+ int intr;
+
+ for (i = 0; i < enic->rq_count; i++) {
+ intr = enic_msix_rq_intr(enic, i);
+ vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+ }
+}
+
static int enic_get_settings(struct net_device *netdev,
struct ethtool_cmd *ecmd)
{
@@ -178,9 +189,14 @@ static int enic_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *ecmd)
{
struct enic *enic = netdev_priv(netdev);
+ struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
ecmd->tx_coalesce_usecs = enic->tx_coalesce_usecs;
ecmd->rx_coalesce_usecs = enic->rx_coalesce_usecs;
+ if (rxcoal->use_adaptive_rx_coalesce)
+ ecmd->use_adaptive_rx_coalesce = 1;
+ ecmd->rx_coalesce_usecs_low = rxcoal->small_pkt_range_start;
+ ecmd->rx_coalesce_usecs_high = rxcoal->range_end;
return 0;
}
@@ -191,17 +207,31 @@ static int enic_set_coalesce(struct net_device *netdev,
struct enic *enic = netdev_priv(netdev);
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
+ u32 rx_coalesce_usecs_low;
+ u32 rx_coalesce_usecs_high;
+ u32 coalesce_usecs_max;
unsigned int i, intr;
+ struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
+ coalesce_usecs_max = vnic_dev_get_intr_coal_timer_max(enic->vdev);
tx_coalesce_usecs = min_t(u32, ecmd->tx_coalesce_usecs,
- vnic_dev_get_intr_coal_timer_max(enic->vdev));
+ coalesce_usecs_max);
rx_coalesce_usecs = min_t(u32, ecmd->rx_coalesce_usecs,
- vnic_dev_get_intr_coal_timer_max(enic->vdev));
+ coalesce_usecs_max);
+
+ rx_coalesce_usecs_low = min_t(u32, ecmd->rx_coalesce_usecs_low,
+ coalesce_usecs_max);
+ rx_coalesce_usecs_high = min_t(u32, ecmd->rx_coalesce_usecs_high,
+ coalesce_usecs_max);
switch (vnic_dev_get_intr_mode(enic->vdev)) {
case VNIC_DEV_INTR_MODE_INTX:
if (tx_coalesce_usecs != rx_coalesce_usecs)
return -EINVAL;
+ if (ecmd->use_adaptive_rx_coalesce ||
+ ecmd->rx_coalesce_usecs_low ||
+ ecmd->rx_coalesce_usecs_high)
+ return -EOPNOTSUPP;
intr = enic_legacy_io_intr();
vnic_intr_coalescing_timer_set(&enic->intr[intr],
@@ -210,6 +240,10 @@ static int enic_set_coalesce(struct net_device *netdev,
case VNIC_DEV_INTR_MODE_MSI:
if (tx_coalesce_usecs != rx_coalesce_usecs)
return -EINVAL;
+ if (ecmd->use_adaptive_rx_coalesce ||
+ ecmd->rx_coalesce_usecs_low ||
+ ecmd->rx_coalesce_usecs_high)
+ return -EOPNOTSUPP;
vnic_intr_coalescing_timer_set(&enic->intr[0],
tx_coalesce_usecs);
@@ -221,12 +255,27 @@ static int enic_set_coalesce(struct net_device *netdev,
tx_coalesce_usecs);
}
- for (i = 0; i < enic->rq_count; i++) {
- intr = enic_msix_rq_intr(enic, i);
- vnic_intr_coalescing_timer_set(&enic->intr[intr],
- rx_coalesce_usecs);
+ if (rxcoal->use_adaptive_rx_coalesce) {
+ if (!ecmd->use_adaptive_rx_coalesce) {
+ rxcoal->use_adaptive_rx_coalesce = 0;
+ enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
+ }
+ } else {
+ if (ecmd->use_adaptive_rx_coalesce)
+ rxcoal->use_adaptive_rx_coalesce = 1;
+ else
+ enic_intr_coal_set_rx(enic, rx_coalesce_usecs);
}
+ if (ecmd->rx_coalesce_usecs_high) {
+ if (rx_coalesce_usecs_high <
+ (rx_coalesce_usecs_low + ENIC_AIC_LARGE_PKT_DIFF))
+ return -EINVAL;
+ rxcoal->range_end = rx_coalesce_usecs_high;
+ rxcoal->small_pkt_range_start = rx_coalesce_usecs_low;
+ rxcoal->large_pkt_range_start = rx_coalesce_usecs_low +
+ ENIC_AIC_LARGE_PKT_DIFF;
+ }
break;
default:
break;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 2945718ce80..0d8995cc92e 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -38,6 +38,7 @@
#include <linux/rtnetlink.h>
#include <linux/prefetch.h>
#include <net/ip6_checksum.h>
+#include <linux/ktime.h>
#include "cq_enet_desc.h"
#include "vnic_dev.h"
@@ -72,6 +73,35 @@ MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
MODULE_DEVICE_TABLE(pci, enic_id_table);
+#define ENIC_LARGE_PKT_THRESHOLD 1000
+#define ENIC_MAX_COALESCE_TIMERS 10
+/* Interrupt moderation table, which will be used to decide the
+ * coalescing timer values
+ * {rx_rate in Mbps, mapping percentage of the range}
+ */
+struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
+ {4000, 0},
+ {4400, 10},
+ {5060, 20},
+ {5230, 30},
+ {5540, 40},
+ {5820, 50},
+ {6120, 60},
+ {6435, 70},
+ {6745, 80},
+ {7000, 90},
+ {0xFFFFFFFF, 100}
+};
+
+/* This table helps the driver to pick different ranges for rx coalescing
+ * timer depending on the link speed.
+ */
+struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
+ {0, 0}, /* 0 - 4 Gbps */
+ {0, 3}, /* 4 - 10 Gbps */
+ {3, 6}, /* 10 - 40 Gbps */
+};
+
int enic_is_dynamic(struct enic *enic)
{
return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
@@ -979,6 +1009,15 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
return 0;
}
+static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
+ u32 pkt_len)
+{
+ if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
+ pkt_size->large_pkt_bytes_cnt += pkt_len;
+ else
+ pkt_size->small_pkt_bytes_cnt += pkt_len;
+}
+
static void enic_rq_indicate_buf(struct vnic_rq *rq,
struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
int skipped, void *opaque)
@@ -986,6 +1025,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
struct enic *enic = vnic_dev_priv(rq->vdev);
struct net_device *netdev = enic->netdev;
struct sk_buff *skb;
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
u8 type, color, eop, sop, ingress_port, vlan_stripped;
u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
@@ -1056,6 +1096,9 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
napi_gro_receive(&enic->napi[q_number], skb);
else
netif_receive_skb(skb);
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ enic_intr_update_pkt_size(&cq->pkt_size_counter,
+ bytes_written);
} else {
/* Buffer overflow
@@ -1134,6 +1177,64 @@ static int enic_poll(struct napi_struct *napi, int budget)
return rq_work_done;
}
+static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+ unsigned int intr = enic_msix_rq_intr(enic, rq->index);
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ u32 timer = cq->tobe_rx_coal_timeval;
+
+ if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
+ vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
+ cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
+ }
+}
+
+static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
+{
+ struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+ struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
+ int index;
+ u32 timer;
+ u32 range_start;
+ u32 traffic;
+ u64 delta;
+ ktime_t now = ktime_get();
+
+ delta = ktime_us_delta(now, cq->prev_ts);
+ if (delta < ENIC_AIC_TS_BREAK)
+ return;
+ cq->prev_ts = now;
+
+ traffic = pkt_size_counter->large_pkt_bytes_cnt +
+ pkt_size_counter->small_pkt_bytes_cnt;
+ /* The table takes Mbps
+ * traffic *= 8 => bits
+ * traffic *= (10^6 / delta) => bps
+ * traffic /= 10^6 => Mbps
+ *
+ * Combining, traffic *= (8 / delta)
+ */
+
+ traffic <<= 3;
+ traffic /= delta;
+
+ for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
+ if (traffic < mod_table[index].rx_rate)
+ break;
+ range_start = (pkt_size_counter->small_pkt_bytes_cnt >
+ pkt_size_counter->large_pkt_bytes_cnt << 1) ?
+ rx_coal->small_pkt_range_start :
+ rx_coal->large_pkt_range_start;
+ timer = range_start + ((rx_coal->range_end - range_start) *
+ mod_table[index].range_percent / 100);
+ /* Damping */
+ cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
+
+ pkt_size_counter->large_pkt_bytes_cnt = 0;
+ pkt_size_counter->small_pkt_bytes_cnt = 0;
+}
+
static int enic_poll_msix(struct napi_struct *napi, int budget)
{
struct net_device *netdev = napi->dev;
@@ -1171,6 +1272,13 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
if (err)
work_done = work_to_do;
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ /* Call the function which refreshes
+ * the intr coalescing timer value based on
+ * the traffic. This is supported only in
+ * the case of MSI-x mode
+ */
+ enic_calc_int_moderation(enic, &enic->rq[rq]);
if (work_done < work_to_do) {
@@ -1179,6 +1287,8 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
*/
napi_complete(napi);
+ if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
+ enic_set_int_moderation(enic, &enic->rq[rq]);
vnic_intr_unmask(&enic->intr[intr]);
}
@@ -1314,6 +1424,42 @@ static void enic_synchronize_irqs(struct enic *enic)
}
}
+static void enic_set_rx_coal_setting(struct enic *enic)
+{
+ unsigned int speed;
+ int index = -1;
+ struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+
+ /* If intr mode is not MSIX, do not do adaptive coalescing */
+ if (VNIC_DEV_INTR_MODE_MSIX != vnic_dev_get_intr_mode(enic->vdev)) {
+ netdev_info(enic->netdev, "INTR mode is not MSIX, Not initializing adaptive coalescing");
+ return;
+ }
+
+ /* 1. Read the link speed from fw
+ * 2. Pick the default range for the speed
+ * 3. Update it in enic->rx_coalesce_setting
+ */
+ speed = vnic_dev_port_speed(enic->vdev);
+ if (ENIC_LINK_SPEED_10G < speed)
+ index = ENIC_LINK_40G_INDEX;
+ else if (ENIC_LINK_SPEED_4G < speed)
+ index = ENIC_LINK_10G_INDEX;
+ else
+ index = ENIC_LINK_4G_INDEX;
+
+ rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
+ rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
+ rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
+
+ /* Start with the value provided by UCSM */
+ for (index = 0; index < enic->rq_count; index++)
+ enic->cq[index].cur_rx_coal_timeval =
+ enic->config.intr_timer_usec;
+
+ rx_coal->use_adaptive_rx_coalesce = 1;
+}
+
static int enic_dev_notify_set(struct enic *enic)
{
int err;
@@ -2231,6 +2377,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
enic->notify_timer.function = enic_notify_timer;
enic->notify_timer.data = (unsigned long)enic;
+ enic_set_rx_coal_setting(enic);
INIT_WORK(&enic->reset, enic_reset);
INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
@@ -2250,6 +2397,9 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
+ /* rx coalesce time already got initialized. This gets used
+ * if adaptive coal is turned off
+ */
enic->rx_coalesce_usecs = enic->tx_coalesce_usecs;
if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.h b/drivers/net/ethernet/cisco/enic/vnic_cq.h
index 579315cbe80..4e6aa65857f 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_cq.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_cq.h
@@ -50,6 +50,11 @@ struct vnic_cq_ctrl {
u32 pad10;
};
+struct vnic_rx_bytes_counter {
+ unsigned int small_pkt_bytes_cnt;
+ unsigned int large_pkt_bytes_cnt;
+};
+
struct vnic_cq {
unsigned int index;
struct vnic_dev *vdev;
@@ -58,6 +63,10 @@ struct vnic_cq {
unsigned int to_clean;
unsigned int last_color;
unsigned int interrupt_offset;
+ struct vnic_rx_bytes_counter pkt_size_counter;
+ unsigned int cur_rx_coal_timeval;
+ unsigned int tobe_rx_coal_timeval;
+ ktime_t prev_ts;
};
static inline unsigned int vnic_cq_service(struct vnic_cq *cq,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 8d69e439f0c..cb5c987bee3 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1255,6 +1255,49 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
return 0;
}
+static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ int ret;
+
+ if (enable) {
+ ret = clk_prepare_enable(fep->clk_ahb);
+ if (ret)
+ return ret;
+ ret = clk_prepare_enable(fep->clk_ipg);
+ if (ret)
+ goto failed_clk_ipg;
+ if (fep->clk_enet_out) {
+ ret = clk_prepare_enable(fep->clk_enet_out);
+ if (ret)
+ goto failed_clk_enet_out;
+ }
+ if (fep->clk_ptp) {
+ ret = clk_prepare_enable(fep->clk_ptp);
+ if (ret)
+ goto failed_clk_ptp;
+ }
+ } else {
+ clk_disable_unprepare(fep->clk_ahb);
+ clk_disable_unprepare(fep->clk_ipg);
+ if (fep->clk_enet_out)
+ clk_disable_unprepare(fep->clk_enet_out);
+ if (fep->clk_ptp)
+ clk_disable_unprepare(fep->clk_ptp);
+ }
+
+ return 0;
+failed_clk_ptp:
+ if (fep->clk_enet_out)
+ clk_disable_unprepare(fep->clk_enet_out);
+failed_clk_enet_out:
+ clk_disable_unprepare(fep->clk_ipg);
+failed_clk_ipg:
+ clk_disable_unprepare(fep->clk_ahb);
+
+ return ret;
+}
+
static int fec_enet_mii_probe(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1364,7 +1407,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
* Reference Manual has an error on this, and gets fixed on i.MX6Q
* document.
*/
- fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000);
+ fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
fep->phy_speed--;
fep->phy_speed <<= 1;
@@ -1773,6 +1816,10 @@ fec_enet_open(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
int ret;
+ ret = fec_enet_clk_enable(ndev, true);
+ if (ret)
+ return ret;
+
/* I should reset the ring buffers here, but I don't yet know
* a simple way to do that.
*/
@@ -1811,6 +1858,7 @@ fec_enet_close(struct net_device *ndev)
phy_disconnect(fep->phy_dev);
}
+ fec_enet_clk_enable(ndev, false);
fec_enet_free_buffers(ndev);
return 0;
@@ -2164,26 +2212,10 @@ fec_probe(struct platform_device *pdev)
fep->bufdesc_ex = 0;
}
- ret = clk_prepare_enable(fep->clk_ahb);
+ ret = fec_enet_clk_enable(ndev, true);
if (ret)
goto failed_clk;
- ret = clk_prepare_enable(fep->clk_ipg);
- if (ret)
- goto failed_clk_ipg;
-
- if (fep->clk_enet_out) {
- ret = clk_prepare_enable(fep->clk_enet_out);
- if (ret)
- goto failed_clk_enet_out;
- }
-
- if (fep->clk_ptp) {
- ret = clk_prepare_enable(fep->clk_ptp);
- if (ret)
- goto failed_clk_ptp;
- }
-
fep->reg_phy = devm_regulator_get(&pdev->dev, "phy");
if (!IS_ERR(fep->reg_phy)) {
ret = regulator_enable(fep->reg_phy);
@@ -2225,6 +2257,7 @@ fec_probe(struct platform_device *pdev)
/* Carrier starts down, phylib will bring it up */
netif_carrier_off(ndev);
+ fec_enet_clk_enable(ndev, false);
ret = register_netdev(ndev);
if (ret)
@@ -2244,15 +2277,7 @@ failed_init:
if (fep->reg_phy)
regulator_disable(fep->reg_phy);
failed_regulator:
- if (fep->clk_ptp)
- clk_disable_unprepare(fep->clk_ptp);
-failed_clk_ptp:
- if (fep->clk_enet_out)
- clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
- clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
- clk_disable_unprepare(fep->clk_ahb);
+ fec_enet_clk_enable(ndev, false);
failed_clk:
failed_ioremap:
free_netdev(ndev);
@@ -2272,14 +2297,9 @@ fec_drv_remove(struct platform_device *pdev)
del_timer_sync(&fep->time_keep);
if (fep->reg_phy)
regulator_disable(fep->reg_phy);
- if (fep->clk_ptp)
- clk_disable_unprepare(fep->clk_ptp);
if (fep->ptp_clock)
ptp_clock_unregister(fep->ptp_clock);
- if (fep->clk_enet_out)
- clk_disable_unprepare(fep->clk_enet_out);
- clk_disable_unprepare(fep->clk_ipg);
- clk_disable_unprepare(fep->clk_ahb);
+ fec_enet_clk_enable(ndev, false);
free_netdev(ndev);
return 0;
@@ -2296,12 +2316,7 @@ fec_suspend(struct device *dev)
fec_stop(ndev);
netif_device_detach(ndev);
}
- if (fep->clk_ptp)
- clk_disable_unprepare(fep->clk_ptp);
- if (fep->clk_enet_out)
- clk_disable_unprepare(fep->clk_enet_out);
- clk_disable_unprepare(fep->clk_ipg);
- clk_disable_unprepare(fep->clk_ahb);
+ fec_enet_clk_enable(ndev, false);
if (fep->reg_phy)
regulator_disable(fep->reg_phy);
@@ -2322,25 +2337,9 @@ fec_resume(struct device *dev)
return ret;
}
- ret = clk_prepare_enable(fep->clk_ahb);
- if (ret)
- goto failed_clk_ahb;
-
- ret = clk_prepare_enable(fep->clk_ipg);
+ ret = fec_enet_clk_enable(ndev, true);
if (ret)
- goto failed_clk_ipg;
-
- if (fep->clk_enet_out) {
- ret = clk_prepare_enable(fep->clk_enet_out);
- if (ret)
- goto failed_clk_enet_out;
- }
-
- if (fep->clk_ptp) {
- ret = clk_prepare_enable(fep->clk_ptp);
- if (ret)
- goto failed_clk_ptp;
- }
+ goto failed_clk;
if (netif_running(ndev)) {
fec_restart(ndev, fep->full_duplex);
@@ -2349,14 +2348,7 @@ fec_resume(struct device *dev)
return 0;
-failed_clk_ptp:
- if (fep->clk_enet_out)
- clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
- clk_disable_unprepare(fep->clk_ipg);
-failed_clk_ipg:
- clk_disable_unprepare(fep->clk_ahb);
-failed_clk_ahb:
+failed_clk:
if (fep->reg_phy)
regulator_disable(fep->reg_phy);
return ret;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index dc80db41d6b..cfaf17b70f3 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -792,10 +792,6 @@ static int fs_init_phy(struct net_device *dev)
phydev = of_phy_connect(dev, fep->fpi->phy_node, &fs_adjust_link, 0,
iface);
if (!phydev) {
- phydev = of_phy_connect_fixed_link(dev, &fs_adjust_link,
- iface);
- }
- if (!phydev) {
dev_err(&dev->dev, "Could not attach to PHY\n");
return -ENODEV;
}
@@ -1029,9 +1025,16 @@ static int fs_enet_probe(struct platform_device *ofdev)
fpi->use_napi = 1;
fpi->napi_weight = 17;
fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
- if ((!fpi->phy_node) && (!of_get_property(ofdev->dev.of_node, "fixed-link",
- NULL)))
- goto out_free_fpi;
+ if (!fpi->phy_node && of_phy_is_fixed_link(ofdev->dev.of_node)) {
+ err = of_phy_register_fixed_link(ofdev->dev.of_node);
+ if (err)
+ goto out_free_fpi;
+
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ fpi->phy_node = ofdev->dev.of_node;
+ }
if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec")) {
phy_connection_type = of_get_property(ofdev->dev.of_node,
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e2d42475b00..282674027c9 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -889,6 +889,17 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (of_phy_is_fixed_link(np)) {
+ err = of_phy_register_fixed_link(np);
+ if (err)
+ goto err_grp_init;
+
+ priv->phy_node = np;
+ }
+
/* Find the TBI PHY. If it's not there, we don't support SGMII */
priv->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
@@ -1660,9 +1671,6 @@ static int init_phy(struct net_device *dev)
priv->phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
interface);
- if (!priv->phydev)
- priv->phydev = of_phy_connect_fixed_link(dev, &adjust_link,
- interface);
if (!priv->phydev) {
dev_err(&dev->dev, "could not attach to PHY\n");
return -ENODEV;
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index c8299c31b21..fab39e29544 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -1728,9 +1728,6 @@ static int init_phy(struct net_device *dev)
phydev = of_phy_connect(dev, ug_info->phy_node, &adjust_link, 0,
priv->phy_interface);
- if (!phydev)
- phydev = of_phy_connect_fixed_link(dev, &adjust_link,
- priv->phy_interface);
if (!phydev) {
dev_err(&dev->dev, "Could not attach to PHY\n");
return -ENODEV;
@@ -3790,6 +3787,17 @@ static int ucc_geth_probe(struct platform_device* ofdev)
ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0);
+ if (!ug_info->phy_node) {
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
+ if (of_phy_is_fixed_link(np)) {
+ err = of_phy_register_fixed_link(np);
+ if (err)
+ return err;
+ }
+ ug_info->phy_node = np;
+ }
/* Find the TBI PHY node. If it's not there, we don't support SGMII */
ug_info->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index a46571c1e9f..ef5bb11557e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -97,10 +97,6 @@
#define STRINGIFY(foo) #foo
#define XSTRINGIFY(bar) STRINGIFY(bar)
-#ifndef ARCH_HAS_PREFETCH
-#define prefetch(X)
-#endif
-
#define I40E_RX_DESC(R, i) \
((ring_is_16byte_desc_enabled(R)) \
? (union i40e_32byte_rx_desc *) \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index f62929419a0..861e1db47a7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -112,7 +112,6 @@ static struct i40e_stats i40e_gstrings_stats[] = {
I40E_PF_STAT("rx_oversize", stats.rx_oversize),
I40E_PF_STAT("rx_jabber", stats.rx_jabber),
I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests),
- I40E_PF_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
/* LPI stats */
I40E_PF_STAT("tx_lpi_status", stats.tx_lpi_status),
@@ -122,8 +121,9 @@ static struct i40e_stats i40e_gstrings_stats[] = {
};
#define I40E_QUEUE_STATS_LEN(n) \
- ((((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs + \
- ((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs) * 2)
+ (((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs \
+ * 2 /* Tx and Rx together */ \
+ * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
#define I40E_GLOBAL_STATS_LEN ARRAY_SIZE(i40e_gstrings_stats)
#define I40E_NETDEV_STATS_LEN ARRAY_SIZE(i40e_gstrings_net_stats)
#define I40E_VSI_STATS_LEN(n) (I40E_NETDEV_STATS_LEN + \
@@ -1009,14 +1009,13 @@ static int i40e_get_coalesce(struct net_device *netdev,
ec->rx_max_coalesced_frames_irq = vsi->work_limit;
if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
- ec->rx_coalesce_usecs = 1;
- else
- ec->rx_coalesce_usecs = vsi->rx_itr_setting;
+ ec->use_adaptive_rx_coalesce = 1;
if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
- ec->tx_coalesce_usecs = 1;
- else
- ec->tx_coalesce_usecs = vsi->tx_itr_setting;
+ ec->use_adaptive_tx_coalesce = 1;
+
+ ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
return 0;
}
@@ -1035,37 +1034,27 @@ static int i40e_set_coalesce(struct net_device *netdev,
if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
vsi->work_limit = ec->tx_max_coalesced_frames_irq;
- switch (ec->rx_coalesce_usecs) {
- case 0:
- vsi->rx_itr_setting = 0;
- break;
- case 1:
- vsi->rx_itr_setting = (I40E_ITR_DYNAMIC |
- ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
- break;
- default:
- if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)))
- return -EINVAL;
+ if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+ (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
vsi->rx_itr_setting = ec->rx_coalesce_usecs;
- break;
- }
+ else
+ return -EINVAL;
- switch (ec->tx_coalesce_usecs) {
- case 0:
- vsi->tx_itr_setting = 0;
- break;
- case 1:
- vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
- ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
- break;
- default:
- if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)))
- return -EINVAL;
+ if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+ (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
vsi->tx_itr_setting = ec->tx_coalesce_usecs;
- break;
- }
+ else
+ return -EINVAL;
+
+ if (ec->use_adaptive_rx_coalesce)
+ vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ else
+ vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+ if (ec->use_adaptive_tx_coalesce)
+ vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ else
+ vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
vector = vsi->base_vector;
for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e399f9b7077..e0e5c6a867b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4333,7 +4333,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi)
dev_driver_string(&pf->pdev->dev));
err = i40e_vsi_request_irq(vsi, int_name);
} else {
- err = EINVAL;
+ err = -EINVAL;
goto err_setup_rx;
}
@@ -6368,6 +6368,10 @@ static int i40e_sw_init(struct i40e_pf *pf)
I40E_FLAG_MSIX_ENABLED |
I40E_FLAG_RX_1BUF_ENABLED;
+ /* Set default ITR */
+ pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
+ pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+
/* Depending on PF configurations, it is possible that the RSS
* maximum might end up larger than the available queues
*/
@@ -8364,6 +8368,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
}
+#ifdef CONFIG_PCI_IOV
/* prep for VF support */
if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
@@ -8386,6 +8391,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err);
}
}
+#endif /* CONFIG_PCI_IOV */
pfs_found++;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 1fedc7a1589..101f439acda 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -48,7 +48,6 @@
I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
#define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (0x2 << \
I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
-#define I40E_PTP_TX_TIMEOUT (HZ * 15)
/**
* i40e_ptp_read - Read the PHC time from the device
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ece7ae99b03..8d0ef445fa4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -24,6 +24,7 @@
*
******************************************************************************/
+#include <linux/prefetch.h>
#include "i40e.h"
#include "i40e_prototype.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 82e7abf6430..4d219566a04 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -842,6 +842,10 @@ void i40e_free_vfs(struct i40e_pf *pf)
kfree(pf->vf);
pf->vf = NULL;
+ /* This check is for when the driver is unloaded while VFs are
+ * assigned. Setting the number of VFs to 0 through sysfs is caught
+ * before this function ever gets called.
+ */
if (!i40e_vfs_are_assigned(pf)) {
pci_disable_sriov(pf->pdev);
/* Acknowledge VFLR for all VFS. Without this, VFs will fail to
@@ -978,7 +982,12 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (num_vfs)
return i40e_pci_sriov_enable(pdev, num_vfs);
- i40e_free_vfs(pf);
+ if (!i40e_vfs_are_assigned(pf)) {
+ i40e_free_vfs(pf);
+ } else {
+ dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
+ return -EINVAL;
+ }
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
index e09be37a07a..3a423836a56 100644
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ b/drivers/net/ethernet/intel/i40evf/Makefile
@@ -1,7 +1,7 @@
################################################################################
#
# Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
-# Copyright(c) 2013 Intel Corporation.
+# Copyright(c) 2013 - 2014 Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
# The full GNU General Public License is included in this distribution in
# the file called "COPYING".
#
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index c79df257f83..68b4aacd43f 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index 7d24be52860..e3472c62e15 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 6e617669c32..89d9209ff2b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
@@ -676,7 +679,6 @@ struct i40e_aqc_add_get_update_vsi {
#define I40E_AQ_VSI_TYPE_PF 0x2
#define I40E_AQ_VSI_TYPE_EMP_MNG 0x3
#define I40E_AQ_VSI_FLAG_CASCADED_PV 0x4
-#define I40E_AQ_VSI_FLAG_CLOUD_VSI 0x8
__le32 addr_high;
__le32 addr_low;
};
@@ -1038,7 +1040,9 @@ struct i40e_aqc_set_vsi_promiscuous_modes {
#define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10
__le16 seid;
#define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF
- u8 reserved[10];
+ __le16 vlan_tag;
+#define I40E_AQC_SET_VSI_VLAN_VALID 0x8000
+ u8 reserved[8];
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
@@ -1931,19 +1935,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
/* Add Udp Tunnel command and completion (direct 0x0B00) */
struct i40e_aqc_add_udp_tunnel {
__le16 udp_port;
- u8 header_len; /* in DWords, 1 to 15 */
+ u8 reserved0[3];
u8 protocol_type;
-#define I40E_AQC_TUNNEL_TYPE_TEREDO 0x0
-#define I40E_AQC_TUNNEL_TYPE_VXLAN 0x2
-#define I40E_AQC_TUNNEL_TYPE_NGE 0x3
- u8 variable_udp_length;
-#define I40E_AQC_TUNNEL_FIXED_UDP_LENGTH 0x0
-#define I40E_AQC_TUNNEL_VARIABLE_UDP_LENGTH 0x1
- u8 udp_key_index;
-#define I40E_AQC_TUNNEL_KEY_INDEX_VXLAN 0x0
-#define I40E_AQC_TUNNEL_KEY_INDEX_NGE 0x1
-#define I40E_AQC_TUNNEL_KEY_INDEX_PROPRIETARY_UDP 0x2
- u8 reserved[10];
+#define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00
+#define I40E_AQC_TUNNEL_TYPE_NGE 0x01
+#define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10
+ u8 reserved1[10];
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
index d8654fb9e52..8e6a6dd9212 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index ae084378faa..ac660963b8b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
index cb97b3eed44..9d906514fc3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
index 775fcb2463d..d6f76224153 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 622f373b745..21a91b14bf8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 97ab8c2b76f..849edcc2e39 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
index 30af953cf10..aa4a92e3b12 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 7c08cc2e339..7fa7a41915c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index b9f50f40abe..82d6844245b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 10bf49e18d7..d0119d0a9fc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 51a6dee3c7b..fb5371ad1cb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
@@ -101,15 +104,6 @@ enum i40e_debug_mask {
I40E_DEBUG_ALL = 0xFFFFFFFF
};
-/* PCI Bus Info */
-#define I40E_PCI_LINK_WIDTH_1 0x10
-#define I40E_PCI_LINK_WIDTH_2 0x20
-#define I40E_PCI_LINK_WIDTH_4 0x40
-#define I40E_PCI_LINK_WIDTH_8 0x80
-#define I40E_PCI_LINK_SPEED_2500 0x1
-#define I40E_PCI_LINK_SPEED_5000 0x2
-#define I40E_PCI_LINK_SPEED_8000 0x3
-
/* These are structs for managing the hardware information and the operations.
* The structures of function pointers are filled out at init time when we
* know for sure exactly which hardware we're working with. This gives us the
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index ccf45d04b7e..1ef5b31ece9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 807807d6238..2913bc3332a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 77e786d2d0e..df4dcfd364d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
@@ -56,10 +59,12 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = {
};
#define I40EVF_GLOBAL_STATS_LEN ARRAY_SIZE(i40evf_gstrings_stats)
-#define I40EVF_QUEUE_STATS_LEN \
+#define I40EVF_QUEUE_STATS_LEN(_dev) \
(((struct i40evf_adapter *) \
- netdev_priv(netdev))->vsi_res->num_queue_pairs * 4)
-#define I40EVF_STATS_LEN (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN)
+ netdev_priv(_dev))->vsi_res->num_queue_pairs \
+ * 2 * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
+#define I40EVF_STATS_LEN(_dev) \
+ (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
/**
* i40evf_get_settings - Get Link Speed and Duplex settings
@@ -75,7 +80,7 @@ static int i40evf_get_settings(struct net_device *netdev,
/* In the future the VF will be able to query the PF for
* some information - for now use a dummy value
*/
- ecmd->supported = SUPPORTED_10000baseT_Full;
+ ecmd->supported = 0;
ecmd->autoneg = AUTONEG_DISABLE;
ecmd->transceiver = XCVR_DUMMY1;
ecmd->port = PORT_NONE;
@@ -94,9 +99,9 @@ static int i40evf_get_settings(struct net_device *netdev,
static int i40evf_get_sset_count(struct net_device *netdev, int sset)
{
if (sset == ETH_SS_STATS)
- return I40EVF_STATS_LEN;
+ return I40EVF_STATS_LEN(netdev);
else
- return -ENOTSUPP;
+ return -EINVAL;
}
/**
@@ -290,14 +295,13 @@ static int i40evf_get_coalesce(struct net_device *netdev,
ec->rx_max_coalesced_frames = vsi->work_limit;
if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
- ec->rx_coalesce_usecs = 1;
- else
- ec->rx_coalesce_usecs = vsi->rx_itr_setting;
+ ec->use_adaptive_rx_coalesce = 1;
if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
- ec->tx_coalesce_usecs = 1;
- else
- ec->tx_coalesce_usecs = vsi->tx_itr_setting;
+ ec->use_adaptive_tx_coalesce = 1;
+
+ ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+ ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
return 0;
}
@@ -318,40 +322,34 @@ static int i40evf_set_coalesce(struct net_device *netdev,
struct i40e_q_vector *q_vector;
int i;
- if (ec->tx_max_coalesced_frames || ec->rx_max_coalesced_frames)
- vsi->work_limit = ec->tx_max_coalesced_frames;
+ if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+ vsi->work_limit = ec->tx_max_coalesced_frames_irq;
- switch (ec->rx_coalesce_usecs) {
- case 0:
- vsi->rx_itr_setting = 0;
- break;
- case 1:
- vsi->rx_itr_setting = (I40E_ITR_DYNAMIC
- | ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
- break;
- default:
- if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)))
- return -EINVAL;
+ if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+ (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
vsi->rx_itr_setting = ec->rx_coalesce_usecs;
- break;
- }
- switch (ec->tx_coalesce_usecs) {
- case 0:
- vsi->tx_itr_setting = 0;
- break;
- case 1:
- vsi->tx_itr_setting = (I40E_ITR_DYNAMIC
- | ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
- break;
- default:
- if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
- (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)))
- return -EINVAL;
+ else
+ return -EINVAL;
+
+ if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
+ (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
vsi->tx_itr_setting = ec->tx_coalesce_usecs;
- break;
- }
+ else if (ec->use_adaptive_tx_coalesce)
+ vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
+ ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
+ else
+ return -EINVAL;
+
+ if (ec->use_adaptive_rx_coalesce)
+ vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ else
+ vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+ if (ec->use_adaptive_tx_coalesce)
+ vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ else
+ vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
q_vector = adapter->q_vector[i];
@@ -675,7 +673,7 @@ static int i40evf_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
return 0;
}
-static struct ethtool_ops i40evf_ethtool_ops = {
+static const struct ethtool_ops i40evf_ethtool_ops = {
.get_settings = i40evf_get_settings,
.get_drvinfo = i40evf_get_drvinfo,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 6edd581dffa..6f6bd3f0180 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
@@ -1029,30 +1032,21 @@ i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors)
* Right now, we simply care about how many we'll get; we'll
* set them up later while requesting irq's.
*/
- while (vectors >= vector_threshold) {
- err = pci_enable_msix(adapter->pdev, adapter->msix_entries,
- vectors);
- if (!err) /* Success in acquiring all requested vectors. */
- break;
- else if (err < 0)
- vectors = 0; /* Nasty failure, quit now */
- else /* err == number of vectors we should try again with */
- vectors = err;
- }
-
- if (vectors < vector_threshold) {
+ err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ vector_threshold, vectors);
+ if (err < 0) {
dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts.\n");
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
- err = -EIO;
- } else {
- /* Adjust for only the vectors we'll use, which is minimum
- * of max_msix_q_vectors + NONQ_VECS, or the number of
- * vectors we were allocated.
- */
- adapter->num_msix_vectors = vectors;
+ return err;
}
- return err;
+
+ /* Adjust for only the vectors we'll use, which is minimum
+ * of max_msix_q_vectors + NONQ_VECS, or the number of
+ * vectors we were allocated.
+ */
+ adapter->num_msix_vectors = err;
+ return 0;
}
/**
@@ -2119,8 +2113,10 @@ static void i40evf_init_task(struct work_struct *work)
adapter->vsi.back = adapter;
adapter->vsi.base_vector = 1;
adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
- adapter->vsi.rx_itr_setting = I40E_ITR_DYNAMIC;
- adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
+ adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC |
+ ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
+ adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC |
+ ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
adapter->vsi.netdev = adapter->netdev;
if (!adapter->netdev_registered) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index e294f012647..7f80bb41772 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -12,6 +12,9 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
*
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index df1d1b97187..3b0f818a4f5 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -42,6 +42,7 @@
#include <linux/dma-mapping.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <net/tso.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/etherdevice.h>
@@ -179,9 +180,10 @@ static char mv643xx_eth_driver_version[] = "1.4";
* Misc definitions.
*/
#define DEFAULT_RX_QUEUE_SIZE 128
-#define DEFAULT_TX_QUEUE_SIZE 256
+#define DEFAULT_TX_QUEUE_SIZE 512
#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
+#define TSO_HEADER_SIZE 128
/*
* RX/TX descriptors.
@@ -250,6 +252,7 @@ struct tx_desc {
#define GEN_TCP_UDP_CHECKSUM 0x00020000
#define UDP_FRAME 0x00010000
#define MAC_HDR_EXTRA_4_BYTES 0x00008000
+#define GEN_TCP_UDP_CHK_FULL 0x00000400
#define MAC_HDR_EXTRA_8_BYTES 0x00000200
#define TX_IHL_SHIFT 11
@@ -345,6 +348,9 @@ struct tx_queue {
int tx_curr_desc;
int tx_used_desc;
+ char *tso_hdrs;
+ dma_addr_t tso_hdrs_dma;
+
struct tx_desc *tx_desc_area;
dma_addr_t tx_desc_dma;
int tx_desc_area_size;
@@ -661,6 +667,198 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb)
return 0;
}
+static inline __be16 sum16_as_be(__sum16 sum)
+{
+ return (__force __be16)sum;
+}
+
+static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb,
+ u16 *l4i_chk, u32 *command, int length)
+{
+ int ret;
+ u32 cmd = 0;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ int hdr_len;
+ int tag_bytes;
+
+ BUG_ON(skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_8021Q));
+
+ hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
+ tag_bytes = hdr_len - ETH_HLEN;
+
+ if (length - hdr_len > mp->shared->tx_csum_limit ||
+ unlikely(tag_bytes & ~12)) {
+ ret = skb_checksum_help(skb);
+ if (!ret)
+ goto no_csum;
+ return ret;
+ }
+
+ if (tag_bytes & 4)
+ cmd |= MAC_HDR_EXTRA_4_BYTES;
+ if (tag_bytes & 8)
+ cmd |= MAC_HDR_EXTRA_8_BYTES;
+
+ cmd |= GEN_TCP_UDP_CHECKSUM | GEN_TCP_UDP_CHK_FULL |
+ GEN_IP_V4_CHECKSUM |
+ ip_hdr(skb)->ihl << TX_IHL_SHIFT;
+
+ /* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL
+ * it seems we don't need to pass the initial checksum. */
+ switch (ip_hdr(skb)->protocol) {
+ case IPPROTO_UDP:
+ cmd |= UDP_FRAME;
+ *l4i_chk = 0;
+ break;
+ case IPPROTO_TCP:
+ *l4i_chk = 0;
+ break;
+ default:
+ WARN(1, "protocol not supported");
+ }
+ } else {
+no_csum:
+ /* Errata BTS #50, IHL must be 5 if no HW checksum */
+ cmd |= 5 << TX_IHL_SHIFT;
+ }
+ *command = cmd;
+ return 0;
+}
+
+static inline int
+txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
+ struct sk_buff *skb, char *data, int length,
+ bool last_tcp, bool is_last)
+{
+ int tx_index;
+ u32 cmd_sts;
+ struct tx_desc *desc;
+
+ tx_index = txq->tx_curr_desc++;
+ if (txq->tx_curr_desc == txq->tx_ring_size)
+ txq->tx_curr_desc = 0;
+ desc = &txq->tx_desc_area[tx_index];
+
+ desc->l4i_chk = 0;
+ desc->byte_cnt = length;
+ desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+ length, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
+ WARN(1, "dma_map_single failed!\n");
+ return -ENOMEM;
+ }
+
+ cmd_sts = BUFFER_OWNED_BY_DMA;
+ if (last_tcp) {
+ /* last descriptor in the TCP packet */
+ cmd_sts |= ZERO_PADDING | TX_LAST_DESC;
+ /* last descriptor in SKB */
+ if (is_last)
+ cmd_sts |= TX_ENABLE_INTERRUPT;
+ }
+ desc->cmd_sts = cmd_sts;
+ return 0;
+}
+
+static inline void
+txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+{
+ struct mv643xx_eth_private *mp = txq_to_mp(txq);
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tx_index;
+ struct tx_desc *desc;
+ int ret;
+ u32 cmd_csum = 0;
+ u16 l4i_chk = 0;
+
+ tx_index = txq->tx_curr_desc;
+ desc = &txq->tx_desc_area[tx_index];
+
+ ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_csum, length);
+ if (ret)
+ WARN(1, "failed to prepare checksum!");
+
+ /* Should we set this? Can't use the value from skb_tx_csum()
+ * as it's not the correct initial L4 checksum to use. */
+ desc->l4i_chk = 0;
+
+ desc->byte_cnt = hdr_len;
+ desc->buf_ptr = txq->tso_hdrs_dma +
+ txq->tx_curr_desc * TSO_HEADER_SIZE;
+ desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA | TX_FIRST_DESC |
+ GEN_CRC;
+
+ txq->tx_curr_desc++;
+ if (txq->tx_curr_desc == txq->tx_ring_size)
+ txq->tx_curr_desc = 0;
+}
+
+static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct mv643xx_eth_private *mp = txq_to_mp(txq);
+ int total_len, data_left, ret;
+ int desc_count = 0;
+ struct tso_t tso;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ /* Count needed descriptors */
+ if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
+ netdev_dbg(dev, "not enough descriptors for TSO!\n");
+ return -EBUSY;
+ }
+
+ /* Initialize the TSO handler, and prepare the first payload */
+ tso_start(skb, &tso);
+
+ total_len = skb->len - hdr_len;
+ while (total_len > 0) {
+ char *hdr;
+
+ data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+ total_len -= data_left;
+ desc_count++;
+
+ /* prepare packet headers: MAC + IP + TCP */
+ hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
+ tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+ txq_put_hdr_tso(skb, txq, data_left);
+
+ while (data_left > 0) {
+ int size;
+ desc_count++;
+
+ size = min_t(int, tso.size, data_left);
+ ret = txq_put_data_tso(dev, txq, skb, tso.data, size,
+ size == data_left,
+ total_len == 0);
+ if (ret)
+ goto err_release;
+ data_left -= size;
+ tso_build_data(skb, &tso, size);
+ }
+ }
+
+ __skb_queue_tail(&txq->tx_skb, skb);
+ skb_tx_timestamp(skb);
+
+ /* clear TX_END status */
+ mp->work_tx_end &= ~(1 << txq->index);
+
+ /* ensure all descriptors are written before poking hardware */
+ wmb();
+ txq_enable(txq);
+ txq->tx_desc_count += desc_count;
+ return 0;
+err_release:
+ /* TODO: Release all used data descriptors; header descriptors must not
+ * be DMA-unmapped.
+ */
+ return ret;
+}
+
static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
@@ -671,8 +869,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
skb_frag_t *this_frag;
int tx_index;
struct tx_desc *desc;
+ void *addr;
this_frag = &skb_shinfo(skb)->frags[frag];
+ addr = page_address(this_frag->page.p) + this_frag->page_offset;
tx_index = txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
txq->tx_curr_desc = 0;
@@ -692,18 +892,11 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb)
desc->l4i_chk = 0;
desc->byte_cnt = skb_frag_size(this_frag);
- desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent,
- this_frag, 0,
- skb_frag_size(this_frag),
- DMA_TO_DEVICE);
+ desc->buf_ptr = dma_map_single(mp->dev->dev.parent, addr,
+ desc->byte_cnt, DMA_TO_DEVICE);
}
}
-static inline __be16 sum16_as_be(__sum16 sum)
-{
- return (__force __be16)sum;
-}
-
static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
{
struct mv643xx_eth_private *mp = txq_to_mp(txq);
@@ -712,53 +905,17 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
struct tx_desc *desc;
u32 cmd_sts;
u16 l4i_chk;
- int length;
+ int length, ret;
- cmd_sts = TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
+ cmd_sts = 0;
l4i_chk = 0;
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- int hdr_len;
- int tag_bytes;
-
- BUG_ON(skb->protocol != htons(ETH_P_IP) &&
- skb->protocol != htons(ETH_P_8021Q));
-
- hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
- tag_bytes = hdr_len - ETH_HLEN;
- if (skb->len - hdr_len > mp->shared->tx_csum_limit ||
- unlikely(tag_bytes & ~12)) {
- if (skb_checksum_help(skb) == 0)
- goto no_csum;
- dev_kfree_skb_any(skb);
- return 1;
- }
-
- if (tag_bytes & 4)
- cmd_sts |= MAC_HDR_EXTRA_4_BYTES;
- if (tag_bytes & 8)
- cmd_sts |= MAC_HDR_EXTRA_8_BYTES;
-
- cmd_sts |= GEN_TCP_UDP_CHECKSUM |
- GEN_IP_V4_CHECKSUM |
- ip_hdr(skb)->ihl << TX_IHL_SHIFT;
-
- switch (ip_hdr(skb)->protocol) {
- case IPPROTO_UDP:
- cmd_sts |= UDP_FRAME;
- l4i_chk = ntohs(sum16_as_be(udp_hdr(skb)->check));
- break;
- case IPPROTO_TCP:
- l4i_chk = ntohs(sum16_as_be(tcp_hdr(skb)->check));
- break;
- default:
- BUG();
- }
- } else {
-no_csum:
- /* Errata BTS #50, IHL must be 5 if no HW checksum */
- cmd_sts |= 5 << TX_IHL_SHIFT;
+ ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_sts, skb->len);
+ if (ret) {
+ dev_kfree_skb_any(skb);
+ return ret;
}
+ cmd_sts |= TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA;
tx_index = txq->tx_curr_desc++;
if (txq->tx_curr_desc == txq->tx_ring_size)
@@ -801,7 +958,7 @@ no_csum:
static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mv643xx_eth_private *mp = netdev_priv(dev);
- int length, queue;
+ int length, queue, ret;
struct tx_queue *txq;
struct netdev_queue *nq;
@@ -825,7 +982,11 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
length = skb->len;
- if (!txq_submit_skb(txq, skb)) {
+ if (skb_is_gso(skb))
+ ret = txq_submit_tso(txq, skb, dev);
+ else
+ ret = txq_submit_skb(txq, skb);
+ if (!ret) {
int entries_left;
txq->tx_bytes += length;
@@ -834,6 +995,8 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev)
entries_left = txq->tx_ring_size - txq->tx_desc_count;
if (entries_left < MAX_SKB_FRAGS + 1)
netif_tx_stop_queue(nq);
+ } else if (ret == -EBUSY) {
+ return NETDEV_TX_BUSY;
}
return NETDEV_TX_OK;
@@ -907,14 +1070,8 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force)
mp->dev->stats.tx_errors++;
}
- if (cmd_sts & TX_FIRST_DESC) {
- dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
- desc->byte_cnt, DMA_TO_DEVICE);
- } else {
- dma_unmap_page(mp->dev->dev.parent, desc->buf_ptr,
- desc->byte_cnt, DMA_TO_DEVICE);
- }
-
+ dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr,
+ desc->byte_cnt, DMA_TO_DEVICE);
dev_kfree_skb(skb);
}
@@ -1871,6 +2028,15 @@ static int txq_init(struct mv643xx_eth_private *mp, int index)
nexti * sizeof(struct tx_desc);
}
+ /* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+ txq->tso_hdrs = dma_alloc_coherent(mp->dev->dev.parent,
+ txq->tx_ring_size * TSO_HEADER_SIZE,
+ &txq->tso_hdrs_dma, GFP_KERNEL);
+ if (txq->tso_hdrs == NULL) {
+ dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
+ txq->tx_desc_area, txq->tx_desc_dma);
+ return -ENOMEM;
+ }
skb_queue_head_init(&txq->tx_skb);
return 0;
@@ -1891,6 +2057,10 @@ static void txq_deinit(struct tx_queue *txq)
else
dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size,
txq->tx_desc_area, txq->tx_desc_dma);
+ if (txq->tso_hdrs)
+ dma_free_coherent(mp->dev->dev.parent,
+ txq->tx_ring_size * TSO_HEADER_SIZE,
+ txq->tso_hdrs, txq->tso_hdrs_dma);
}
@@ -2921,9 +3091,11 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
dev->watchdog_timeo = 2 * HZ;
dev->base_addr = 0;
- dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
- dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
- dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM;
+ dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+ dev->vlan_features = dev->features;
+
+ dev->features |= NETIF_F_RXCSUM;
+ dev->hw_features = dev->features;
dev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c7cbaf3b4af..18c698d9ef9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -23,6 +23,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <linux/io.h>
+#include <net/tso.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_mdio.h>
@@ -244,6 +245,9 @@
#define MVNETA_TX_MTU_MAX 0x3ffff
+/* TSO header size */
+#define TSO_HEADER_SIZE 128
+
/* Max number of Rx descriptors */
#define MVNETA_MAX_RXD 128
@@ -413,6 +417,12 @@ struct mvneta_tx_queue {
/* Index of the next TX DMA descriptor to process */
int next_desc_to_proc;
+
+ /* DMA buffers for TSO headers */
+ char *tso_hdrs;
+
+ /* DMA address of TSO headers */
+ dma_addr_t tso_hdrs_phys;
};
struct mvneta_rx_queue {
@@ -1519,6 +1529,126 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
return rx_done;
}
+static inline void
+mvneta_tso_put_hdr(struct sk_buff *skb,
+ struct mvneta_port *pp, struct mvneta_tx_queue *txq)
+{
+ struct mvneta_tx_desc *tx_desc;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ txq->tx_skb[txq->txq_put_index] = NULL;
+ tx_desc = mvneta_txq_next_desc_get(txq);
+ tx_desc->data_size = hdr_len;
+ tx_desc->command = mvneta_skb_tx_csum(pp, skb);
+ tx_desc->command |= MVNETA_TXD_F_DESC;
+ tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
+ txq->txq_put_index * TSO_HEADER_SIZE;
+ mvneta_txq_inc_put(txq);
+}
+
+static inline int
+mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
+ struct sk_buff *skb, char *data, int size,
+ bool last_tcp, bool is_last)
+{
+ struct mvneta_tx_desc *tx_desc;
+
+ tx_desc = mvneta_txq_next_desc_get(txq);
+ tx_desc->data_size = size;
+ tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, data,
+ size, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev->dev.parent,
+ tx_desc->buf_phys_addr))) {
+ mvneta_txq_desc_put(txq);
+ return -ENOMEM;
+ }
+
+ tx_desc->command = 0;
+ txq->tx_skb[txq->txq_put_index] = NULL;
+
+ if (last_tcp) {
+ /* last descriptor in the TCP packet */
+ tx_desc->command = MVNETA_TXD_L_DESC;
+
+ /* last descriptor in SKB */
+ if (is_last)
+ txq->tx_skb[txq->txq_put_index] = skb;
+ }
+ mvneta_txq_inc_put(txq);
+ return 0;
+}
+
+static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
+ struct mvneta_tx_queue *txq)
+{
+ int total_len, data_left;
+ int desc_count = 0;
+ struct mvneta_port *pp = netdev_priv(dev);
+ struct tso_t tso;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int i;
+
+ /* Count needed descriptors */
+ if ((txq->count + tso_count_descs(skb)) >= txq->size)
+ return 0;
+
+ if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
+ pr_info("*** Is this even possible???!?!?\n");
+ return 0;
+ }
+
+ /* Initialize the TSO handler, and prepare the first payload */
+ tso_start(skb, &tso);
+
+ total_len = skb->len - hdr_len;
+ while (total_len > 0) {
+ char *hdr;
+
+ data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+ total_len -= data_left;
+ desc_count++;
+
+ /* prepare packet headers: MAC + IP + TCP */
+ hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE;
+ tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+
+ mvneta_tso_put_hdr(skb, pp, txq);
+
+ while (data_left > 0) {
+ int size;
+ desc_count++;
+
+ size = min_t(int, tso.size, data_left);
+
+ if (mvneta_tso_put_data(dev, txq, skb,
+ tso.data, size,
+ size == data_left,
+ total_len == 0))
+ goto err_release;
+ data_left -= size;
+
+ tso_build_data(skb, &tso, size);
+ }
+ }
+
+ return desc_count;
+
+err_release:
+ /* Release all used data descriptors; header descriptors must not
+ * be DMA-unmapped.
+ */
+ for (i = desc_count - 1; i >= 0; i--) {
+ struct mvneta_tx_desc *tx_desc = txq->descs + i;
+ if (!(tx_desc->command & MVNETA_TXD_F_DESC))
+ dma_unmap_single(pp->dev->dev.parent,
+ tx_desc->buf_phys_addr,
+ tx_desc->data_size,
+ DMA_TO_DEVICE);
+ mvneta_txq_desc_put(txq);
+ }
+ return 0;
+}
+
/* Handle tx fragmentation processing */
static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
struct mvneta_tx_queue *txq)
@@ -1584,15 +1714,18 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
u16 txq_id = skb_get_queue_mapping(skb);
struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
struct mvneta_tx_desc *tx_desc;
- struct netdev_queue *nq;
int frags = 0;
u32 tx_cmd;
if (!netif_running(dev))
goto out;
+ if (skb_is_gso(skb)) {
+ frags = mvneta_tx_tso(skb, dev, txq);
+ goto out;
+ }
+
frags = skb_shinfo(skb)->nr_frags + 1;
- nq = netdev_get_tx_queue(dev, txq_id);
/* Get a descriptor for the first part of the packet */
tx_desc = mvneta_txq_next_desc_get(txq);
@@ -1635,15 +1768,16 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
}
}
- txq->count += frags;
- mvneta_txq_pend_desc_add(pp, txq, frags);
-
- if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
- netif_tx_stop_queue(nq);
-
out:
if (frags > 0) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+ struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+ txq->count += frags;
+ mvneta_txq_pend_desc_add(pp, txq, frags);
+
+ if (txq->size - txq->count < MAX_SKB_FRAGS + 1)
+ netif_tx_stop_queue(nq);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
@@ -2109,6 +2243,18 @@ static int mvneta_txq_init(struct mvneta_port *pp,
txq->descs, txq->descs_phys);
return -ENOMEM;
}
+
+ /* Allocate DMA buffers for TSO MAC/IP/TCP headers */
+ txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
+ txq->size * TSO_HEADER_SIZE,
+ &txq->tso_hdrs_phys, GFP_KERNEL);
+ if (txq->tso_hdrs == NULL) {
+ kfree(txq->tx_skb);
+ dma_free_coherent(pp->dev->dev.parent,
+ txq->size * MVNETA_DESC_ALIGNED_SIZE,
+ txq->descs, txq->descs_phys);
+ return -ENOMEM;
+ }
mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
return 0;
@@ -2120,6 +2266,10 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
{
kfree(txq->tx_skb);
+ if (txq->tso_hdrs)
+ dma_free_coherent(pp->dev->dev.parent,
+ txq->size * TSO_HEADER_SIZE,
+ txq->tso_hdrs, txq->tso_hdrs_phys);
if (txq->descs)
dma_free_coherent(pp->dev->dev.parent,
txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2895,9 +3045,9 @@ static int mvneta_probe(struct platform_device *pdev)
netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
- dev->features = NETIF_F_SG | NETIF_F_IP_CSUM;
- dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
- dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+ dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+ dev->hw_features |= dev->features;
+ dev->vlan_features |= dev->features;
dev->priv_flags |= IFF_UNICAST_FLT;
err = register_netdev(dev);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index d107bcbb854..7a0deadd53b 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2122,7 +2122,7 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
{
fifo->interrupt_count++;
- if (jiffies > fifo->jiffies + HZ / 100) {
+ if (time_before(fifo->jiffies + HZ / 100, jiffies)) {
struct __vxge_hw_fifo *hw_fifo = fifo->handle;
fifo->jiffies = jiffies;
@@ -2150,7 +2150,7 @@ static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
{
ring->interrupt_count++;
- if (jiffies > ring->jiffies + HZ / 100) {
+ if (time_before(ring->jiffies + HZ / 100, jiffies)) {
struct __vxge_hw_ring *hw_ring = ring->handle;
ring->jiffies = jiffies;
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index e76eae54115..f32d730f55c 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1865,7 +1865,6 @@ static int davinci_emac_probe(struct platform_device *pdev)
struct emac_priv *priv;
unsigned long hw_ram_addr;
struct emac_platform_data *pdata;
- struct device *emac_dev;
struct cpdma_params dma_params;
struct clk *emac_clk;
unsigned long emac_bus_frequency;
@@ -1911,7 +1910,6 @@ static int davinci_emac_probe(struct platform_device *pdev)
priv->coal_intvl = 0;
priv->bus_freq_mhz = (u32)(emac_bus_frequency / 1000000);
- emac_dev = &ndev->dev;
/* Get EMAC platform data */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
@@ -1930,7 +1928,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
hw_ram_addr = (u32 __force)res->start + pdata->ctrl_ram_offset;
memset(&dma_params, 0, sizeof(dma_params));
- dma_params.dev = emac_dev;
+ dma_params.dev = &pdev->dev;
dma_params.dmaregs = priv->emac_base;
dma_params.rxthresh = priv->emac_base + 0x120;
dma_params.rxfree = priv->emac_base + 0x140;
@@ -1994,7 +1992,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
if (netif_msg_probe(priv)) {
- dev_notice(emac_dev, "DaVinci EMAC Probe found device "\
+ dev_notice(&pdev->dev, "DaVinci EMAC Probe found device "
"(regs: %p, irq: %d)\n",
(void *)priv->emac_base_phys, ndev->irq);
}
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index b8d22173925..27d83207d24 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -26,6 +26,7 @@
#include <linux/timer.h>
#include <linux/platform_device.h>
#include <linux/netdevice.h>
+#include <linux/device.h>
#include <linux/spinlock.h>
#include <net/mac802154.h>
#include <net/wpan-phy.h>
@@ -228,7 +229,8 @@ static int fakelb_probe(struct platform_device *pdev)
int err = -ENOMEM;
int i;
- priv = kzalloc(sizeof(struct fakelb_priv), GFP_KERNEL);
+ priv = devm_kzalloc(&pdev->dev, sizeof(struct fakelb_priv),
+ GFP_KERNEL);
if (!priv)
goto err_alloc;
@@ -248,7 +250,6 @@ static int fakelb_probe(struct platform_device *pdev)
err_slave:
list_for_each_entry(dp, &priv->list, list)
fakelb_del(dp);
- kfree(priv);
err_alloc:
return err;
}
@@ -260,7 +261,6 @@ static int fakelb_remove(struct platform_device *pdev)
list_for_each_entry_safe(dp, temp, &priv->list, list)
fakelb_del(dp);
- kfree(priv);
return 0;
}
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 767fe61b5ac..9a9ce8debef 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -968,7 +968,7 @@ static void team_port_disable(struct team *team,
static void __team_compute_features(struct team *team)
{
struct team_port *port;
- u32 vlan_features = TEAM_VLAN_FEATURES;
+ u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
unsigned short max_hard_header_len = ETH_HLEN;
unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ee328ba101e..98bad1fb1bf 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -498,12 +498,12 @@ static void tun_detach_all(struct net_device *dev)
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
BUG_ON(!tfile);
- wake_up_all(&tfile->wq.wait);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
RCU_INIT_POINTER(tfile->tun, NULL);
--tun->numqueues;
}
list_for_each_entry(tfile, &tun->disabled, next) {
- wake_up_all(&tfile->wq.wait);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
RCU_INIT_POINTER(tfile->tun, NULL);
}
BUG_ON(tun->numqueues != 0);
@@ -807,8 +807,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
- wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ tfile->socket.sk->sk_data_ready(tfile->socket.sk);
rcu_read_unlock();
return NETDEV_TX_OK;
@@ -965,7 +964,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
- poll_wait(file, &tfile->wq.wait, wait);
+ poll_wait(file, sk_sleep(sk), wait);
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
@@ -1330,47 +1329,26 @@ done:
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
const struct iovec *iv, ssize_t len, int noblock)
{
- DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t ret = 0;
+ int peeked, err, off = 0;
tun_debug(KERN_INFO, tun, "tun_do_read\n");
- if (unlikely(!noblock))
- add_wait_queue(&tfile->wq.wait, &wait);
- while (len) {
- if (unlikely(!noblock))
- current->state = TASK_INTERRUPTIBLE;
+ if (!len)
+ return ret;
- /* Read frames from the queue */
- if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) {
- if (noblock) {
- ret = -EAGAIN;
- break;
- }
- if (signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
- if (tun->dev->reg_state != NETREG_REGISTERED) {
- ret = -EIO;
- break;
- }
-
- /* Nothing to read, let's sleep */
- schedule();
- continue;
- }
+ if (tun->dev->reg_state != NETREG_REGISTERED)
+ return -EIO;
+ /* Read frames from queue */
+ skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0,
+ &peeked, &off, &err);
+ if (skb) {
ret = tun_put_user(tun, tfile, skb, iv, len);
kfree_skb(skb);
- break;
- }
-
- if (unlikely(!noblock)) {
- current->state = TASK_RUNNING;
- remove_wait_queue(&tfile->wq.wait, &wait);
- }
+ } else
+ ret = err;
return ret;
}
@@ -2199,8 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->flags = 0;
tfile->ifindex = 0;
- rcu_assign_pointer(tfile->socket.wq, &tfile->wq);
init_waitqueue_head(&tfile->wq.wait);
+ RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
tfile->socket.file = file;
tfile->socket.ops = &tun_socket_ops;
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 9c34d2fccfa..9c78090e72f 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -500,26 +500,23 @@ int i2400m_pm_notifier(struct notifier_block *notifier,
*/
int i2400m_pre_reset(struct i2400m *i2400m)
{
- int result;
struct device *dev = i2400m_dev(i2400m);
d_fnstart(3, dev, "(i2400m %p)\n", i2400m);
d_printf(1, dev, "pre-reset shut down\n");
- result = 0;
mutex_lock(&i2400m->init_mutex);
if (i2400m->updown) {
netif_tx_disable(i2400m->wimax_dev.net_dev);
__i2400m_dev_stop(i2400m);
- result = 0;
/* down't set updown to zero -- this way
* post_reset can restore properly */
}
mutex_unlock(&i2400m->init_mutex);
if (i2400m->bus_release)
i2400m->bus_release(i2400m);
- d_fnend(3, dev, "(i2400m %p) = %d\n", i2400m, result);
- return result;
+ d_fnend(3, dev, "(i2400m %p) = 0\n", i2400m);
+ return 0;
}
EXPORT_SYMBOL_GPL(i2400m_pre_reset);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 1def0bb5cb3..4c1e01ed16d 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -246,44 +246,6 @@ struct phy_device *of_phy_connect(struct net_device *dev,
EXPORT_SYMBOL(of_phy_connect);
/**
- * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy
- * @dev: pointer to net_device claiming the phy
- * @hndlr: Link state callback for the network device
- * @iface: PHY data interface type
- *
- * This function is a temporary stop-gap and will be removed soon. It is
- * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers. Do
- * not call this function from new drivers.
- */
-struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
- void (*hndlr)(struct net_device *),
- phy_interface_t iface)
-{
- struct device_node *net_np;
- char bus_id[MII_BUS_ID_SIZE + 3];
- struct phy_device *phy;
- const __be32 *phy_id;
- int sz;
-
- if (!dev->dev.parent)
- return NULL;
-
- net_np = dev->dev.parent->of_node;
- if (!net_np)
- return NULL;
-
- phy_id = of_get_property(net_np, "fixed-link", &sz);
- if (!phy_id || sz < sizeof(*phy_id))
- return NULL;
-
- sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
-
- phy = phy_connect(dev, bus_id, hndlr, iface);
- return IS_ERR(phy) ? NULL : phy;
-}
-EXPORT_SYMBOL(of_phy_connect_fixed_link);
-
-/**
* of_phy_attach - Attach to a PHY without starting the state machine
* @dev: pointer to net_device claiming the phy
* @phy_np: Node pointer for the PHY
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9d5ae0a2c95..7977b3958e2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -184,10 +184,8 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
int sk_filter(struct sock *sk, struct sk_buff *skb);
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
- const struct sock_filter_int *insni);
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
- const struct sock_filter_int *insni);
+void sk_filter_select_runtime(struct sk_filter *fp);
+void sk_filter_free(struct sk_filter *fp);
int sk_convert_filter(struct sock_filter *prog, int len,
struct sock_filter_int *new_prog, int *new_len);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2dea98cbbdb..f4ad247fd32 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3153,6 +3153,20 @@ const char *netdev_drivername(const struct net_device *dev);
void linkwatch_run_queue(void);
+static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
+ netdev_features_t f2)
+{
+ if (f1 & NETIF_F_GEN_CSUM)
+ f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+ if (f2 & NETIF_F_GEN_CSUM)
+ f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+ f1 &= f2;
+ if (f1 & NETIF_F_GEN_CSUM)
+ f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+
+ return f1;
+}
+
static inline netdev_features_t netdev_get_wanted_features(
struct net_device *dev)
{
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 0aa367e316c..d449018d072 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -22,9 +22,6 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
struct phy_device *of_phy_attach(struct net_device *dev,
struct device_node *phy_np, u32 flags,
phy_interface_t iface);
-extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
- void (*hndlr)(struct net_device *),
- phy_interface_t iface);
extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
@@ -59,13 +56,6 @@ static inline struct phy_device *of_phy_attach(struct net_device *dev,
return NULL;
}
-static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
- void (*hndlr)(struct net_device *),
- phy_interface_t iface)
-{
- return NULL;
-}
-
static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
{
return NULL;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bc35e4709e8..a0513210798 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -197,7 +197,8 @@ struct tcp_sock {
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */
- syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */
@@ -209,6 +210,8 @@ struct tcp_sock {
u32 packets_out; /* Packets which are "in flight" */
u32 retrans_out; /* Retransmitted packets out */
+ u32 max_packets_out; /* max packets_out in last window */
+ u32 max_packets_seq; /* right edge of max_packets_out flight */
u16 urg_data; /* Saved octet of OOB data and control flags */
u8 ecn_flags; /* ECN status bits. */
@@ -230,7 +233,6 @@ struct tcp_sock {
u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
u32 snd_cwnd_used;
u32 snd_cwnd_stamp;
- u32 lsnd_pending; /* packets inflight or unsent since last xmit */
u32 prior_cwnd; /* Congestion window at start of Recovery. */
u32 prr_delivered; /* Number of newly delivered packets to
* receiver in Recovery. */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bc14d8fa9..7ee6ce6564a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -72,21 +72,23 @@ static inline void nft_data_debug(const struct nft_data *data)
* struct nft_ctx - nf_tables rule/set context
*
* @net: net namespace
- * @skb: netlink skb
- * @nlh: netlink message header
* @afi: address family info
* @table: the table the chain is contained in
* @chain: the chain the rule is contained in
* @nla: netlink attributes
+ * @portid: netlink portID of the original message
+ * @seq: netlink sequence number
+ * @report: notify via unicast netlink message
*/
struct nft_ctx {
struct net *net;
- const struct sk_buff *skb;
- const struct nlmsghdr *nlh;
- const struct nft_af_info *afi;
- const struct nft_table *table;
- const struct nft_chain *chain;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
const struct nlattr * const *nla;
+ u32 portid;
+ u32 seq;
+ bool report;
};
struct nft_data_desc {
@@ -146,6 +148,44 @@ struct nft_set_iter {
};
/**
+ * struct nft_set_desc - description of set elements
+ *
+ * @klen: key length
+ * @dlen: data length
+ * @size: number of set elements
+ */
+struct nft_set_desc {
+ unsigned int klen;
+ unsigned int dlen;
+ unsigned int size;
+};
+
+/**
+ * enum nft_set_class - performance class
+ *
+ * @NFT_LOOKUP_O_1: constant, O(1)
+ * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
+ * @NFT_LOOKUP_O_N: linear, O(N)
+ */
+enum nft_set_class {
+ NFT_SET_CLASS_O_1,
+ NFT_SET_CLASS_O_LOG_N,
+ NFT_SET_CLASS_O_N,
+};
+
+/**
+ * struct nft_set_estimate - estimation of memory and performance
+ * characteristics
+ *
+ * @size: required memory
+ * @class: lookup performance class
+ */
+struct nft_set_estimate {
+ unsigned int size;
+ enum nft_set_class class;
+};
+
+/**
* struct nft_set_ops - nf_tables set operations
*
* @lookup: look up an element within the set
@@ -174,7 +214,11 @@ struct nft_set_ops {
struct nft_set_iter *iter);
unsigned int (*privsize)(const struct nlattr * const nla[]);
+ bool (*estimate)(const struct nft_set_desc *desc,
+ u32 features,
+ struct nft_set_estimate *est);
int (*init)(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set);
@@ -194,6 +238,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @name: name of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
+ * @size: maximum set size
+ * @nelems: number of elements
* @ops: set ops
* @flags: set flags
* @klen: key length
@@ -206,6 +252,8 @@ struct nft_set {
char name[IFNAMSIZ];
u32 ktype;
u32 dtype;
+ u32 size;
+ u32 nelems;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags;
@@ -222,6 +270,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
const struct nlattr *nla);
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla);
/**
* struct nft_set_binding - nf_tables set binding
@@ -341,18 +391,75 @@ struct nft_rule {
};
/**
- * struct nft_rule_trans - nf_tables rule update in transaction
+ * struct nft_trans - nf_tables object update in transaction
*
+ * @rcu_head: rcu head to defer release of transaction data
* @list: used internally
- * @ctx: rule context
- * @rule: rule that needs to be updated
+ * @msg_type: message type
+ * @ctx: transaction context
+ * @data: internal information related to the transaction
*/
-struct nft_rule_trans {
+struct nft_trans {
+ struct rcu_head rcu_head;
struct list_head list;
+ int msg_type;
struct nft_ctx ctx;
+ char data[0];
+};
+
+struct nft_trans_rule {
struct nft_rule *rule;
};
+#define nft_trans_rule(trans) \
+ (((struct nft_trans_rule *)trans->data)->rule)
+
+struct nft_trans_set {
+ struct nft_set *set;
+ u32 set_id;
+};
+
+#define nft_trans_set(trans) \
+ (((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans) \
+ (((struct nft_trans_set *)trans->data)->set_id)
+
+struct nft_trans_chain {
+ bool update;
+ char name[NFT_CHAIN_MAXNAMELEN];
+ struct nft_stats __percpu *stats;
+ u8 policy;
+};
+
+#define nft_trans_chain_update(trans) \
+ (((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans) \
+ (((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans) \
+ (((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans) \
+ (((struct nft_trans_chain *)trans->data)->policy)
+
+struct nft_trans_table {
+ bool update;
+ bool enable;
+};
+
+#define nft_trans_table_update(trans) \
+ (((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans) \
+ (((struct nft_trans_table *)trans->data)->enable)
+
+struct nft_trans_elem {
+ struct nft_set *set;
+ struct nft_set_elem elem;
+};
+
+#define nft_trans_elem_set(trans) \
+ (((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans) \
+ (((struct nft_trans_elem *)trans->data)->elem)
+
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
{
return (struct nft_expr *)&rule->data[0];
@@ -385,6 +492,7 @@ static inline void *nft_userdata(const struct nft_rule *rule)
enum nft_chain_flags {
NFT_BASE_CHAIN = 0x1,
+ NFT_CHAIN_INACTIVE = 0x2,
};
/**
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
new file mode 100644
index 00000000000..0ee47c3e2e3
--- /dev/null
+++ b/include/net/netfilter/nft_meta.h
@@ -0,0 +1,36 @@
+#ifndef _NFT_META_H_
+#define _NFT_META_H_
+
+struct nft_meta {
+ enum nft_meta_keys key:8;
+ union {
+ enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
+ };
+};
+
+extern const struct nla_policy nft_meta_policy[];
+
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[]);
+
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr);
+
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr);
+
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt);
+
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt);
+
+#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f5d6ca4a9d2..e80abe4486c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
/* We follow the spirit of RFC2861 to validate cwnd but implement a more
* flexible approach. The RFC suggests cwnd should not be raised unless
- * it was fully used previously. But we allow cwnd to grow as long as the
- * application has used half the cwnd.
+ * it was fully used previously. And that's exactly what we do in
+ * congestion avoidance mode. But in slow start we allow cwnd to grow
+ * as long as the application has used half the cwnd.
* Example :
* cwnd is 10 (IW10), but application sends 9 frames.
* We allow cwnd to reach 18 when all frames are ACKed.
@@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return tp->snd_cwnd < 2 * tp->lsnd_pending;
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+ return tp->is_cwnd_limited;
}
static inline void tcp_check_probe_timer(struct sock *sk)
diff --git a/include/net/tso.h b/include/net/tso.h
new file mode 100644
index 00000000000..47e5444f7d1
--- /dev/null
+++ b/include/net/tso.h
@@ -0,0 +1,20 @@
+#ifndef _TSO_H
+#define _TSO_H
+
+#include <net/ip.h>
+
+struct tso_t {
+ int next_frag_idx;
+ void *data;
+ size_t size;
+ u16 ip_id;
+ u32 tcp_seq;
+};
+
+int tso_count_descs(struct sk_buff *skb);
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+ int size, bool is_last);
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size);
+void tso_start(struct sk_buff *skb, struct tso_t *tso);
+
+#endif /* _TSO_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c88ccbfda5f..2a88f645a5d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -212,6 +212,29 @@ enum nft_set_flags {
};
/**
+ * enum nft_set_policies - set selection policy
+ *
+ * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use
+ * @NFT_SET_POL_MEMORY: prefer low memory use over high performance
+ */
+enum nft_set_policies {
+ NFT_SET_POL_PERFORMANCE,
+ NFT_SET_POL_MEMORY,
+};
+
+/**
+ * enum nft_set_desc_attributes - set element description
+ *
+ * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
+ */
+enum nft_set_desc_attributes {
+ NFTA_SET_DESC_UNSPEC,
+ NFTA_SET_DESC_SIZE,
+ __NFTA_SET_DESC_MAX
+};
+#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1)
+
+/**
* enum nft_set_attributes - nf_tables set netlink attributes
*
* @NFTA_SET_TABLE: table name (NLA_STRING)
@@ -221,6 +244,9 @@ enum nft_set_flags {
* @NFTA_SET_KEY_LEN: key data length (NLA_U32)
* @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
* @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ * @NFTA_SET_POLICY: selection policy (NLA_U32)
+ * @NFTA_SET_DESC: set description (NLA_NESTED)
+ * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -231,6 +257,9 @@ enum nft_set_attributes {
NFTA_SET_KEY_LEN,
NFTA_SET_DATA_TYPE,
NFTA_SET_DATA_LEN,
+ NFTA_SET_POLICY,
+ NFTA_SET_DESC,
+ NFTA_SET_ID,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -266,12 +295,14 @@ enum nft_set_elem_attributes {
* @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
* @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
* @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_set_elem_list_attributes {
NFTA_SET_ELEM_LIST_UNSPEC,
NFTA_SET_ELEM_LIST_TABLE,
NFTA_SET_ELEM_LIST_SET,
NFTA_SET_ELEM_LIST_ELEMENTS,
+ NFTA_SET_ELEM_LIST_SET_ID,
__NFTA_SET_ELEM_LIST_MAX
};
#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1)
@@ -457,12 +488,14 @@ enum nft_cmp_attributes {
* @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
* @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
* @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
*/
enum nft_lookup_attributes {
NFTA_LOOKUP_UNSPEC,
NFTA_LOOKUP_SET,
NFTA_LOOKUP_SREG,
NFTA_LOOKUP_DREG,
+ NFTA_LOOKUP_SET_ID,
__NFTA_LOOKUP_MAX
};
#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
@@ -536,6 +569,8 @@ enum nft_exthdr_attributes {
* @NFT_META_SECMARK: packet secmark (skb->secmark)
* @NFT_META_NFPROTO: netfilter protocol
* @NFT_META_L4PROTO: layer 4 protocol number
+ * @NFT_META_BRI_IIFNAME: packet input bridge interface name
+ * @NFT_META_BRI_OIFNAME: packet output bridge interface name
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -555,6 +590,8 @@ enum nft_meta_keys {
NFT_META_SECMARK,
NFT_META_NFPROTO,
NFT_META_L4PROTO,
+ NFT_META_BRI_IIFNAME,
+ NFT_META_BRI_OIFNAME,
};
/**
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7e02d624cc5..1036b6f2fde 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -273,10 +273,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
atomic_set(&filter->usage, 1);
filter->prog->len = new_len;
- filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
- /* JIT internal BPF into native HW instructions */
- bpf_int_jit_compile(filter->prog);
+ sk_filter_select_runtime(filter->prog);
/*
* If there is an existing filter, make it the prev and don't drop its
@@ -340,7 +338,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig;
orig = orig->prev;
- bpf_jit_free(freeme->prog);
+ sk_filter_free(freeme->prog);
kfree(freeme);
}
}
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3603ebcd5d6..e160934430e 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1489,7 +1489,7 @@ static __init int test_bpf(void)
memcpy(fp_ext->insns, tests[i].insns_int,
fprog.len * 8);
fp->len = fprog.len;
- fp->bpf_func = sk_run_filter_int_skb;
+ sk_filter_select_runtime(fp);
} else {
err = sk_unattached_filter_create(&fp, &fprog);
if (tests[i].data_type == EXPECTED_FAIL) {
@@ -1516,7 +1516,7 @@ static __init int test_bpf(void)
if (tests[i].data_type != SKB_INT)
sk_unattached_filter_destroy(fp);
else
- kfree(fp);
+ sk_filter_free(fp);
if (err) {
pr_cont("FAIL %d\n", err);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8f025afa29f..4181fb71ba7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -678,9 +678,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
netdev_features_t old_features = features;
- features &= real_dev->vlan_features;
+ features = netdev_intersect_features(features, real_dev->vlan_features);
features |= NETIF_F_RXCSUM;
- features &= real_dev->features;
+ features = netdev_intersect_features(features, real_dev->features);
features |= old_features & NETIF_F_SOFT_FEATURES;
features |= NETIF_F_LLTX;
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f16..29b6e2a8ca9 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
- br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+ br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
@@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_BRIDGE_NETFILTER) += netfilter/
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 19311aafcf5..1a755a1e541 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,6 +22,104 @@
#include "br_private.h"
+/*
+ * Handle changes in state of network devices enslaved to a bridge.
+ *
+ * Note: don't care about up/down if bridge itself is down, because
+ * port state is checked when bridge is brought up.
+ */
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ bool changed_addr;
+ int err;
+
+ /* register of bridge completed, add sysfs entries */
+ if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
+ br_sysfs_addbr(dev);
+ return NOTIFY_DONE;
+ }
+
+ /* not a port of a bridge */
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return NOTIFY_DONE;
+
+ br = p->br;
+
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+ dev_set_mtu(br->dev, br_min_mtu(br));
+ break;
+
+ case NETDEV_CHANGEADDR:
+ spin_lock_bh(&br->lock);
+ br_fdb_changeaddr(p, dev->dev_addr);
+ changed_addr = br_stp_recalculate_bridge_id(br);
+ spin_unlock_bh(&br->lock);
+
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
+ break;
+
+ case NETDEV_CHANGE:
+ br_port_carrier_check(p);
+ break;
+
+ case NETDEV_FEAT_CHANGE:
+ netdev_update_features(br->dev);
+ break;
+
+ case NETDEV_DOWN:
+ spin_lock_bh(&br->lock);
+ if (br->dev->flags & IFF_UP)
+ br_stp_disable_port(p);
+ spin_unlock_bh(&br->lock);
+ break;
+
+ case NETDEV_UP:
+ if (netif_running(br->dev) && netif_oper_up(dev)) {
+ spin_lock_bh(&br->lock);
+ br_stp_enable_port(p);
+ spin_unlock_bh(&br->lock);
+ }
+ break;
+
+ case NETDEV_UNREGISTER:
+ br_del_if(br, dev);
+ break;
+
+ case NETDEV_CHANGENAME:
+ err = br_sysfs_renameif(p);
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* Forbid underlaying device to change its type. */
+ return NOTIFY_BAD;
+
+ case NETDEV_RESEND_IGMP:
+ /* Propagate to master device */
+ call_netdevice_notifiers(event, br->dev);
+ break;
+ }
+
+ /* Events that may cause spanning tree to refresh */
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block br_device_notifier = {
+ .notifier_call = br_device_event
+};
+
static void __net_exit br_net_exit(struct net *net)
{
struct net_device *dev;
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
deleted file mode 100644
index 2998dd1769a..00000000000
--- a/net/bridge/br_notify.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Device event handling
- * Linux ethernet bridge
- *
- * Authors:
- * Lennert Buytenhek <buytenh@gnu.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/net_namespace.h>
-
-#include "br_private.h"
-
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr);
-
-struct notifier_block br_device_notifier = {
- .notifier_call = br_device_event
-};
-
-/*
- * Handle changes in state of network devices enslaved to a bridge.
- *
- * Note: don't care about up/down if bridge itself is down, because
- * port state is checked when bridge is brought up.
- */
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net_bridge_port *p;
- struct net_bridge *br;
- bool changed_addr;
- int err;
-
- /* register of bridge completed, add sysfs entries */
- if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
- br_sysfs_addbr(dev);
- return NOTIFY_DONE;
- }
-
- /* not a port of a bridge */
- p = br_port_get_rtnl(dev);
- if (!p)
- return NOTIFY_DONE;
-
- br = p->br;
-
- switch (event) {
- case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
- break;
-
- case NETDEV_CHANGEADDR:
- spin_lock_bh(&br->lock);
- br_fdb_changeaddr(p, dev->dev_addr);
- changed_addr = br_stp_recalculate_bridge_id(br);
- spin_unlock_bh(&br->lock);
-
- if (changed_addr)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
-
- break;
-
- case NETDEV_CHANGE:
- br_port_carrier_check(p);
- break;
-
- case NETDEV_FEAT_CHANGE:
- netdev_update_features(br->dev);
- break;
-
- case NETDEV_DOWN:
- spin_lock_bh(&br->lock);
- if (br->dev->flags & IFF_UP)
- br_stp_disable_port(p);
- spin_unlock_bh(&br->lock);
- break;
-
- case NETDEV_UP:
- if (netif_running(br->dev) && netif_oper_up(dev)) {
- spin_lock_bh(&br->lock);
- br_stp_enable_port(p);
- spin_unlock_bh(&br->lock);
- }
- break;
-
- case NETDEV_UNREGISTER:
- br_del_if(br, dev);
- break;
-
- case NETDEV_CHANGENAME:
- err = br_sysfs_renameif(p);
- if (err)
- return notifier_from_errno(err);
- break;
-
- case NETDEV_PRE_TYPE_CHANGE:
- /* Forbid underlaying device to change its type. */
- return NOTIFY_BAD;
-
- case NETDEV_RESEND_IGMP:
- /* Propagate to master device */
- call_netdevice_notifiers(event, br->dev);
- break;
- }
-
- /* Events that may cause spanning tree to refresh */
- if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
- event == NETDEV_CHANGE || event == NETDEV_DOWN)
- br_ifinfo_notify(RTM_NEWLINK, p);
-
- return NOTIFY_DONE;
-}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index af067711574..53d6e32965f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -333,8 +333,6 @@ struct br_input_skb_cb {
#define br_debug(br, format, args...) \
pr_debug("%s: " format, (br)->dev->name, ##args)
-extern struct notifier_block br_device_notifier;
-
/* called under bridge lock */
static inline int br_is_root_bridge(const struct net_bridge *br)
{
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0e595..3baf29d34e6 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,13 +2,25 @@
# Bridge netfilter configuration
#
#
-config NF_TABLES_BRIDGE
+menuconfig NF_TABLES_BRIDGE
depends on NF_TABLES
+ select BRIDGE_NETFILTER
tristate "Ethernet Bridge nf_tables support"
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+ tristate "Netfilter nf_table bridge meta support"
+ depends on NFT_META
+ help
+ Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER
+ select BRIDGE_NETFILTER
select NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f58b3..6f2f3943d66 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 00000000000..4f02109d708
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+ const struct net_bridge_port *p;
+
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+ goto err;
+ break;
+ case NFT_META_BRI_OIFNAME:
+ if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+ goto err;
+ break;
+ default:
+ goto out;
+ }
+
+ strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+ return;
+out:
+ return nft_meta_get_eval(expr, data, pkt);
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ case NFT_META_BRI_OIFNAME:
+ break;
+ default:
+ return nft_meta_get_init(ctx, expr, tb);
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_bridge_get_eval,
+ .init = nft_meta_bridge_get_init,
+ .dump = nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_set_eval,
+ .init = nft_meta_set_init,
+ .dump = nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_META_KEY] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG])
+ return &nft_meta_bridge_get_ops;
+
+ if (tb[NFTA_META_SREG])
+ return &nft_meta_bridge_set_ops;
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .name = "meta",
+ .select_ops = &nft_meta_bridge_select_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+ return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925aa45..71093d94ad2 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o
+ sock_diag.o dev_ioctl.o tso.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/filter.c b/net/core/filter.c
index 32c5b44c537..7067cb240d3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -153,7 +153,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
* keep, 0 for none. @ctx is the data we are operating on, @insn is the
* array of filter instructions.
*/
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
@@ -571,15 +571,6 @@ load_byte:
return 0;
}
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
/* Helper to find the offset of pkt_type in sk_buff structure. We want
* to make sure its still a 3bit field starting at a byte boundary;
* taken from arch/x86/net/bpf_jit_comp.c.
@@ -1397,7 +1388,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_release_orig_filter(fp);
- bpf_jit_free(fp);
+ sk_filter_free(fp);
}
/**
@@ -1497,7 +1488,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
goto out_err_free;
}
- fp->bpf_func = sk_run_filter_int_skb;
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1510,6 +1500,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
*/
goto out_err_free;
+ sk_filter_select_runtime(fp);
+
kfree(old_prog);
return fp;
@@ -1528,6 +1520,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog)
{
}
+/**
+ * sk_filter_select_runtime - select execution runtime for BPF program
+ * @fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+ fp->bpf_func = (void *) __sk_run_filter;
+
+ /* Probe if internal BPF can be JITed */
+ bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+ bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk)
{
@@ -1548,12 +1563,9 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
/* JIT compiler couldn't process this filter, so do the
* internal BPF translation for the optimized interpreter.
*/
- if (!fp->jited) {
+ if (!fp->jited)
fp = __sk_migrate_filter(fp, sk);
- /* Probe if internal BPF can be jit-ed */
- bpf_int_jit_compile(fp);
- }
return fp;
}
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 00000000000..097821dd3a8
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,72 @@
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+ /* The Marvell Way */
+ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+ int size, bool is_last)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int mac_hdr_len = skb_network_offset(skb);
+
+ memcpy(hdr, skb->data, hdr_len);
+ iph = (struct iphdr *)(hdr + mac_hdr_len);
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+ tcph->seq = htonl(tso->tcp_seq);
+ tso->ip_id++;
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+}
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+ tso->tcp_seq += size;
+ tso->size -= size;
+ tso->data += size;
+
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ tso->ip_id = ntohs(ip_hdr(skb)->id);
+ tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->next_frag_idx = 0;
+
+ /* Build first data */
+ tso->size = skb_headlen(skb) - hdr_len;
+ tso->data = skb->data + hdr_len;
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 16f0b223102..1cd46a345cb 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,7 +280,7 @@ static ktime_t dccp_timestamp_seed;
*/
u32 dccp_timestamp(void)
{
- s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+ u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
do_div(delta, 10);
return delta;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b1835146..4bc508f0db9 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -486,4 +486,5 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf7..d463c35db33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->lsnd_pending = tp->packets_out + unsent_segs;
+ /* Track the maximum number of outstanding packets in each
+ * window, and remember whether we were cwnd-limited then.
+ */
+ if (!before(tp->snd_una, tp->max_packets_seq) ||
+ tp->packets_out > tp->max_packets_out) {
+ tp->max_packets_out = tp->packets_out;
+ tp->max_packets_seq = tp->snd_nxt;
+ tp->is_cwnd_limited = is_cwnd_limited;
+ }
if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+ bool *is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if (!tp->tso_deferred)
tp->tso_deferred = 1 | (jiffies << 1);
+ if (cong_win < send_win && cong_win < skb->len)
+ *is_cwnd_limited = true;
+
return true;
send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- unsigned int tso_segs, sent_pkts, unsent_segs = 0;
+ unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
+ bool is_cwnd_limited = false;
sent_pkts = 0;
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
+ is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (!push_one && tcp_tso_should_defer(sk, skb))
- goto compute_unsent_segs;
+ if (!push_one &&
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+ break;
}
/* TCP Small Queues :
@@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
* there is no smp_mb__after_set_bit() yet
*/
smp_mb__after_clear_bit();
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- u32 unsent_bytes;
-
-compute_unsent_segs:
- unsent_bytes = tp->write_seq - tp->snd_nxt;
- unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
break;
- }
}
limit = mss_now;
@@ -1997,7 +2006,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- tcp_cwnd_validate(sk, unsent_segs);
+ tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b05b609f69d..fe61545dde7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -61,6 +61,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0a8ff9ed89..aa883afa652 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1455,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e5a453ca302..f4380041f5e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1828,4 +1828,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a83674edaaf..e4a25589ec1 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -207,6 +207,8 @@ static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
return false;
switch (a->mode) {
+ case IEEE802154_SCF_KEY_INDEX:
+ return true;
case IEEE802154_SCF_KEY_SHORT_INDEX:
return a->short_source == b->short_source;
case IEEE802154_SCF_KEY_HW_INDEX:
@@ -773,10 +775,10 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
rc = llsec_do_encrypt(skb, sec, &hdr, key);
llsec_key_put(key);
- return rc < 0 ? rc : 0;
+ return rc;
fail_read:
- read_unlock(&sec->lock);
+ read_unlock_bh(&sec->lock);
fail:
rcu_read_unlock();
return rc;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159db9f0..04788477658 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
return ERR_PTR(-EAFNOSUPPORT);
}
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nft_af_info *afi,
+ struct nft_table *table,
+ struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+ ctx->portid = NETLINK_CB(skb).portid;
+ ctx->report = nlmsg_report(nlh);
+ ctx->seq = nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+ u32 size)
+{
+ struct nft_trans *trans;
+
+ trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ if (trans == NULL)
+ return NULL;
+
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+ return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ list_del(&trans->list);
+ kfree(trans);
+}
+
/*
* Tables
*/
@@ -197,20 +236,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
- family, table);
+ err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -269,6 +303,9 @@ done:
return skb->len;
}
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -295,6 +332,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -343,7 +382,7 @@ err:
return err;
}
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
@@ -353,45 +392,63 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
nf_unregister_hooks(nft_base_chain(chain)->ops,
afi->nops);
}
-
- return 0;
}
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family, ret = 0;
+ struct nft_trans *trans;
+ u32 flags;
+ int ret = 0;
- if (nla[NFTA_TABLE_FLAGS]) {
- u32 flags;
+ if (!ctx->nla[NFTA_TABLE_FLAGS])
+ return 0;
- flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+ sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
- if ((flags & NFT_TABLE_F_DORMANT) &&
- !(table->flags & NFT_TABLE_F_DORMANT)) {
- ret = nf_tables_table_disable(afi, table);
- if (ret >= 0)
- table->flags |= NFT_TABLE_F_DORMANT;
- } else if (!(flags & NFT_TABLE_F_DORMANT) &&
- table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(afi, table);
- if (ret >= 0)
- table->flags &= ~NFT_TABLE_F_DORMANT;
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_trans_table_enable(trans) = false;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ if (ret >= 0) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ nft_trans_table_enable(trans) = true;
}
- if (ret < 0)
- goto err;
}
+ if (ret < 0)
+ goto err;
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_trans_table_update(trans) = true;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
err:
+ nft_trans_destroy(trans);
return ret;
}
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -403,6 +460,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, true);
if (IS_ERR(afi))
@@ -417,11 +476,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
}
if (table != NULL) {
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +507,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+ if (err < 0) {
+ kfree(table);
+ module_put(afi->owner);
+ return err;
+ }
list_add_tail(&table->list, &afi->tables);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
return 0;
}
@@ -457,7 +526,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family;
+ int family = nfmsg->nfgen_family, err;
+ struct nft_ctx ctx;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -466,17 +536,27 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (!list_empty(&table->chains) || !list_empty(&table->sets))
return -EBUSY;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
list_del(&table->list);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
- kfree(table);
- module_put(afi->owner);
return 0;
}
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+ kfree(ctx->table);
+ module_put(ctx->afi->owner);
+}
+
int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
@@ -541,7 +621,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
- [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
};
@@ -637,21 +717,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
- int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -659,18 +731,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
- table, chain);
+ err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -740,10 +815,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -767,8 +846,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
@@ -777,14 +855,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
newstats = alloc_percpu(struct nft_stats);
if (newstats == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -793,6 +871,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+ struct nft_stats __percpu *newstats)
+{
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -802,17 +886,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
free_percpu(oldstats);
} else
rcu_assign_pointer(chain->stats, newstats);
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
}
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ module_put(nft_base_chain(chain)->type->owner);
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else {
+ kfree(chain);
+ }
+}
+
static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
@@ -822,8 +932,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
+ struct nft_stats __percpu *stats;
int err;
bool create;
+ struct nft_ctx ctx;
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
@@ -869,6 +981,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
}
if (chain != NULL) {
+ struct nft_stats *stats = NULL;
+ struct nft_trans *trans;
+
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -882,19 +999,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(chain->flags & NFT_BASE_CHAIN))
return -EOPNOTSUPP;
- err = nf_tables_counters(nft_base_chain(chain),
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0)
- return err;
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
}
- if (nla[NFTA_CHAIN_POLICY])
- nft_base_chain(chain)->policy = policy;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
- if (nla[NFTA_CHAIN_HANDLE] && name)
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
- goto notify;
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
+
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nla_strlcpy(nft_trans_chain_name(trans), name,
+ NFT_CHAIN_MAXNAMELEN);
+ }
+ list_add_tail(&trans->list, &net->nft.commit_list);
+ return 0;
}
if (table->use == UINT_MAX)
@@ -939,23 +1068,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOMEM;
if (nla[NFTA_CHAIN_COUNTERS]) {
- err = nf_tables_counters(basechain,
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0) {
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return err;
+ return PTR_ERR(stats);
}
+ basechain->stats = stats;
} else {
- struct nft_stats __percpu *newstats;
-
- newstats = alloc_percpu(struct nft_stats);
- if (newstats == NULL) {
+ stats = alloc_percpu(struct nft_stats);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return -ENOMEM;
+ return PTR_ERR(stats);
}
- rcu_assign_pointer(basechain->stats, newstats);
+ rcu_assign_pointer(basechain->stats, stats);
}
basechain->type = type;
@@ -992,31 +1119,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
chain->flags & NFT_BASE_CHAIN) {
err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0) {
- module_put(basechain->type->owner);
- free_percpu(basechain->stats);
- kfree(basechain);
- return err;
- }
+ if (err < 0)
+ goto err1;
}
- list_add_tail(&chain->list, &table->chains);
- table->use++;
-notify:
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
- family);
- return 0;
-}
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
- BUG_ON(chain->use > 0);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ if (err < 0)
+ goto err2;
- if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
- } else
- kfree(chain);
+ list_add_tail(&chain->list, &table->chains);
+ return 0;
+err2:
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(chain)->ops,
+ afi->nops);
+ }
+err1:
+ nf_tables_chain_destroy(chain);
+ return err;
}
static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1024,11 +1146,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1037,48 +1161,26 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
-
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
if (!list_empty(&chain->rules) || chain->use > 0)
return -EBUSY;
- list_del(&chain->list);
- table->use--;
-
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
- family);
-
- /* Make sure all rule references are gone before this is released */
- synchronize_rcu();
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
- nf_tables_chain_destroy(chain);
+ list_del(&chain->list);
return 0;
}
-static void nft_ctx_init(struct nft_ctx *ctx,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nft_af_info *afi,
- const struct nft_table *table,
- const struct nft_chain *chain,
- const struct nlattr * const *nla)
-{
- ctx->net = sock_net(skb->sk);
- ctx->skb = skb;
- ctx->nlh = nlh;
- ctx->afi = afi;
- ctx->table = table;
- ctx->chain = chain;
- ctx->nla = nla;
-}
-
/*
* Expressions
*/
@@ -1093,7 +1195,10 @@ static void nft_ctx_init(struct nft_ctx *ctx,
int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&type->list, &nf_tables_expressions);
+ if (type->family == NFPROTO_UNSPEC)
+ list_add_tail(&type->list, &nf_tables_expressions);
+ else
+ list_add(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1361,22 +1466,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_rule_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
+static int nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule,
- int event, u32 flags, int family)
+ int event)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(oskb).portid;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh->nlmsg_seq;
- bool report;
int err;
- report = nlmsg_report(nlh);
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -1384,18 +1482,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
- family, table, chain, rule);
+ err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -1511,10 +1612,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -1554,37 +1659,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-#define NFT_RULE_MAXEXPRS 128
-
-static struct nft_expr_info *info;
-
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
{
- struct nft_rule_trans *rupd;
+ struct nft_trans *trans;
- rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
- if (rupd == NULL)
- return NULL;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
- rupd->ctx = *ctx;
- rupd->rule = rule;
- list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return rupd;
+ return trans;
}
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
- struct nft_rule_trans *repl = NULL;
+ struct nft_trans *trans = NULL;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
@@ -1682,8 +1786,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(&ctx, old_rule);
- if (repl == NULL) {
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
+ old_rule);
+ if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
@@ -1705,7 +1810,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(&ctx, rule) == NULL) {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
@@ -1713,11 +1818,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
err3:
list_del_rcu(&rule->list);
- if (repl) {
- list_del_rcu(&repl->rule->list);
- list_del(&repl->list);
- nft_rule_clear(net, repl->rule);
- kfree(repl);
+ if (trans) {
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_clear(net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
}
err2:
nf_tables_rule_destroy(&ctx, rule);
@@ -1734,7 +1838,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(ctx, rule) == NULL)
+ if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
return 0;
@@ -1760,9 +1864,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
- const struct nft_table *table;
+ struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
@@ -1775,6 +1879,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1807,88 +1913,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return err;
}
-static int nf_tables_commit(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- /* Bump generation counter, invalidate any dump in progress */
- net->nft.genctr++;
-
- /* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
-
- /* Make sure all packets have left the previous generation before
- * purging old rules.
- */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* This rule was inactive in the past and just became active.
- * Clear the next bit of the genmask since its meaning has
- * changed, now it is the future.
- */
- if (nft_rule_is_active(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_NEWRULE, 0,
- rupd->ctx.afi->family);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is in the past, get rid of it */
- list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->ctx.afi->family);
- }
-
- /* Make sure we don't see any packet traversing old rules */
- synchronize_rcu();
-
- /* Now we can safely release unused old rules */
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
-static int nf_tables_abort(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- if (!nft_rule_is_active_next(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is inactive, get rid of it */
- list_del_rcu(&rupd->rule->list);
- }
-
- /* Make sure we don't see any packet accessing aborted rules */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
/*
* Sets
*/
@@ -1912,9 +1936,18 @@ void nft_unregister_set(struct nft_set_ops *ops)
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc,
+ enum nft_set_policies policy)
{
- const struct nft_set_ops *ops;
+ const struct nft_set_ops *ops, *bops;
+ struct nft_set_estimate est, best;
u32 features;
#ifdef CONFIG_MODULES
@@ -1932,15 +1965,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
features &= NFT_SET_INTERVAL | NFT_SET_MAP;
}
- // FIXME: implement selection properly
+ bops = NULL;
+ best.size = ~0;
+ best.class = ~0;
+
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
continue;
+ if (!ops->estimate(desc, features, &est))
+ continue;
+
+ switch (policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ if (est.class < best.class)
+ break;
+ if (est.class == best.class && est.size < best.size)
+ break;
+ continue;
+ case NFT_SET_POL_MEMORY:
+ if (est.size < best.size)
+ break;
+ if (est.size == best.size && est.class < best.class)
+ break;
+ continue;
+ default:
+ break;
+ }
+
if (!try_module_get(ops->owner))
continue;
- return ops;
+ if (bops != NULL)
+ module_put(bops->owner);
+
+ bops = ops;
+ best = est;
}
+ if (bops != NULL)
+ return bops;
+
return ERR_PTR(-EOPNOTSUPP);
}
@@ -1953,6 +2016,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
[NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
[NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_POLICY] = { .type = NLA_U32 },
+ [NFTA_SET_DESC] = { .type = NLA_NESTED },
+ [NFTA_SET_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+ [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1962,8 +2032,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
{
struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi = NULL;
- const struct nft_table *table = NULL;
+ struct nft_af_info *afi = NULL;
+ struct nft_table *table = NULL;
if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1978,6 +2048,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
}
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1999,13 +2071,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ struct nft_trans *trans;
+ u32 id = ntohl(nla_get_be32(nla));
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ id == nft_trans_set_id(trans))
+ return nft_trans_set(trans);
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
{
const struct nft_set *i;
const char *p;
unsigned long *inuse;
- unsigned int n = 0;
+ unsigned int n = 0, min = 0;
p = strnchr(name, IFNAMSIZ, '%');
if (p != NULL) {
@@ -2015,23 +2101,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
-
+cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
+ if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
continue;
- set_bit(tmp, inuse);
+ set_bit(tmp - min, inuse);
}
n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+ if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+ min += BITS_PER_BYTE * PAGE_SIZE;
+ memset(inuse, 0, PAGE_SIZE);
+ goto cont;
+ }
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, n);
+ snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
if (!strcmp(set->name, i->name))
return -ENFILE;
@@ -2044,8 +2135,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
{
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- u32 seq = ctx->nlh->nlmsg_seq;
+ struct nlattr *desc;
+ u32 portid = ctx->portid;
+ u32 seq = ctx->seq;
event |= NFNL_SUBSYS_NFTABLES << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2077,6 +2169,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ desc = nla_nest_start(skb, NFTA_SET_DESC);
+ if (desc == NULL)
+ goto nla_put_failure;
+ if (set->size &&
+ nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+ goto nla_put_failure;
+ nla_nest_end(skb, desc);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -2089,12 +2189,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
int event)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- bool report;
+ u32 portid = ctx->portid;
int err;
- report = nlmsg_report(ctx->nlh);
- if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -2108,8 +2207,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
if (err < 0)
nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2183,7 +2282,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
@@ -2260,6 +2359,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
}
+#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
+
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2289,6 +2390,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2305,13 +2408,50 @@ err:
return err;
}
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+ struct nft_set_desc *desc,
+ const struct nlattr *nla)
+{
+ struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ if (err < 0)
+ return err;
+
+ if (da[NFTA_SET_DESC_SIZE] != NULL)
+ desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+ return 0;
+}
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
@@ -2319,14 +2459,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, klen, dlen, dtype, flags;
+ u32 ktype, dtype, flags, policy;
+ struct nft_set_desc desc;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
nla[NFTA_SET_NAME] == NULL ||
- nla[NFTA_SET_KEY_LEN] == NULL)
+ nla[NFTA_SET_KEY_LEN] == NULL ||
+ nla[NFTA_SET_ID] == NULL)
return -EINVAL;
+ memset(&desc, 0, sizeof(desc));
+
ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2334,8 +2478,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
- klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
flags = 0;
@@ -2347,7 +2491,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
dtype = 0;
- dlen = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
@@ -2360,15 +2503,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
- dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (dlen == 0 ||
- dlen > FIELD_SIZEOF(struct nft_data, data))
+ desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (desc.dlen == 0 ||
+ desc.dlen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
} else
- dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_data);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ policy = NFT_SET_POL_PERFORMANCE;
+ if (nla[NFTA_SET_POLICY] != NULL)
+ policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+ if (nla[NFTA_SET_DESC] != NULL) {
+ err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+ if (err < 0)
+ return err;
+ }
+
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2399,7 +2552,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(nla);
+ ops = nft_select_set_ops(nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -2420,17 +2573,21 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&set->bindings);
set->ops = ops;
set->ktype = ktype;
- set->klen = klen;
+ set->klen = desc.klen;
set->dtype = dtype;
- set->dlen = dlen;
+ set->dlen = desc.dlen;
set->flags = flags;
+ set->size = desc.size;
- err = ops->init(set, nla);
+ err = ops->init(set, &desc, nla);
+ if (err < 0)
+ goto err2;
+
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err2;
list_add_tail(&set->list, &table->sets);
- nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
return 0;
err2:
@@ -2440,16 +2597,20 @@ err1:
return err;
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
{
- list_del(&set->list);
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
set->ops->destroy(set);
module_put(set->ops->owner);
kfree(set);
}
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del(&set->list);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+ nft_set_destroy(set);
+}
+
static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2471,10 +2632,16 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
- nf_tables_set_destroy(&ctx, set);
+ err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del(&set->list);
return 0;
}
@@ -2534,7 +2701,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
{
list_del(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ !(set->flags & NFT_SET_INACTIVE))
nf_tables_set_destroy(ctx, set);
}
@@ -2552,16 +2720,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ bool trans)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
- const struct nft_table *table;
+ struct nft_af_info *afi;
+ struct nft_table *table;
struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2571,6 +2741,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+ return -ENOENT;
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
return 0;
@@ -2644,13 +2816,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+ false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2707,13 +2882,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2724,7 +2901,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+ const struct nft_ctx *ctx, u32 seq,
+ u32 portid, int event, u16 flags,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ int err;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ err = nf_tables_fill_setelem(skb, set, elem);
+ if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ int event, u16 flags)
+{
+ struct net *net = ctx->net;
+ u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ int err;
+
+ if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+ set, elem);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+ int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_elem_set(trans) = set;
+ return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2732,8 +3000,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
struct nft_set_binding *binding;
enum nft_registers dreg;
+ struct nft_trans *trans;
int err;
+ if (set->size && set->nelems == set->size)
+ return -ENFILE;
+
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -2786,7 +3058,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_ctx bind_ctx = {
.afi = ctx->afi,
.table = ctx->table,
- .chain = binding->chain,
+ .chain = (struct nft_chain *)binding->chain,
};
err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2796,12 +3068,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+ if (trans == NULL)
+ goto err3;
+
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err3;
+ goto err4;
+ nft_trans_elem(trans) = elem;
+ list_add(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err4:
+ kfree(trans);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&elem.data, d2.type);
@@ -2815,35 +3095,44 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+ set = nf_tables_set_lookup_byid(net,
+ nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
}
- return 0;
+ return err;
}
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc desc;
struct nft_set_elem elem;
+ struct nft_trans *trans;
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2867,7 +3156,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err2;
- set->ops->remove(set, &elem);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+ if (trans == NULL)
+ goto err2;
+
+ nft_trans_elem(trans) = elem;
+ list_add(&trans->list, &ctx->net->nft.commit_list);
nft_data_uninit(&elem.key, NFT_DATA_VALUE);
if (set->flags & NFT_SET_MAP)
@@ -2886,9 +3180,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -2901,14 +3195,14 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
}
- return 0;
+ return err;
}
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
- .call = nf_tables_newtable,
+ .call_batch = nf_tables_newtable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
@@ -2918,12 +3212,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
- .call = nf_tables_deltable,
+ .call_batch = nf_tables_deltable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
- .call = nf_tables_newchain,
+ .call_batch = nf_tables_newchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2933,7 +3227,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
- .call = nf_tables_delchain,
+ .call_batch = nf_tables_delchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2953,7 +3247,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
- .call = nf_tables_newset,
+ .call_batch = nf_tables_newset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
@@ -2963,12 +3257,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
- .call = nf_tables_delset,
+ .call_batch = nf_tables_delset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
- .call = nf_tables_newsetelem,
+ .call_batch = nf_tables_newsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
@@ -2978,12 +3272,270 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
- .call = nf_tables_delsetelem,
+ .call_batch = nf_tables_delsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
};
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+ struct nft_base_chain *basechain;
+
+ if (nft_trans_chain_name(trans)[0])
+ strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+ if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+ nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+ switch (nft_trans_chain_policy(trans)) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ basechain->policy = nft_trans_chain_policy(trans);
+ break;
+ }
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_DELRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_DELSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ net->nft.genctr++;
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (!nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ } else {
+ trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ }
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans))
+ nft_chain_commit_update(trans);
+ else {
+ trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+ trans->ctx.table->use++;
+ }
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELCHAIN:
+ trans->ctx.table->use--;
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ break;
+ case NFT_MSG_NEWRULE:
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_NEWRULE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELRULE:
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_DELRULE);
+ break;
+ case NFT_MSG_NEWSET:
+ nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_NEWSET);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSET:
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_DELSET);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nft_trans_elem_set(trans)->nelems++;
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_NEWSETELEM, 0);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nft_trans_elem_set(trans)->nelems--;
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_DELSETELEM, 0);
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
+ }
+
+ return 0;
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_NEWRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_NEWSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ nft_trans_destroy(trans);
+ } else {
+ list_del(&trans->ctx.table->list);
+ }
+ break;
+ case NFT_MSG_DELTABLE:
+ list_add_tail(&trans->ctx.table->list,
+ &trans->ctx.afi->tables);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans)) {
+ if (nft_trans_chain_stats(trans))
+ free_percpu(nft_trans_chain_stats(trans));
+
+ nft_trans_destroy(trans);
+ } else {
+ list_del(&trans->ctx.chain->list);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+ list_add_tail(&trans->ctx.chain->list,
+ &trans->ctx.table->chains);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWRULE:
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ break;
+ case NFT_MSG_DELRULE:
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSET:
+ list_del(&nft_trans_set(trans)->list);
+ break;
+ case NFT_MSG_DELSET:
+ list_add_tail(&nft_trans_set(trans)->list,
+ &trans->ctx.table->sets);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
+ }
+
+ return 0;
+}
+
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0d41e6934..cc560301624 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family)
nf_ct_l3proto_module_put(family);
}
-static int nft_ct_init_validate_get(const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
- if (tb[NFTA_CT_DIRECTION] != NULL) {
- priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
- switch (priv->dir) {
- case IP_CT_DIR_ORIGINAL:
- case IP_CT_DIR_REPLY:
- break;
- default:
- return -EINVAL;
- }
- }
-
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
case NFT_CT_STATE:
case NFT_CT_DIRECTION:
@@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
return -EOPNOTSUPP;
}
- return 0;
-}
-
-static int nft_ct_init_validate_set(uint32_t key)
-{
- switch (key) {
- case NFT_CT_MARK:
- break;
- default:
- return -EOPNOTSUPP;
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+
return 0;
}
-static int nft_ct_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
-
- if (tb[NFTA_CT_DREG]) {
- err = nft_ct_init_validate_get(expr, tb);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
- if (err < 0)
- return err;
- } else {
- err = nft_ct_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ break;
+#endif
+ default:
+ return -EOPNOTSUPP;
}
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
return err;
@@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
- .init = nft_ct_init,
+ .init = nft_ct_get_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_get_dump,
};
@@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
- .init = nft_ct_init,
+ .init = nft_ct_set_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_set_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad876d6b..1dfeb678683 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#define NFT_HASH_MIN_SIZE 4
+#define NFT_HASH_MIN_SIZE 4UL
struct nft_hash {
struct nft_hash_table __rcu *tbl;
@@ -27,7 +28,6 @@ struct nft_hash {
struct nft_hash_table {
unsigned int size;
- unsigned int elements;
struct nft_hash_elem __rcu *buckets[];
};
@@ -82,6 +82,11 @@ static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
kfree(tbl);
}
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+ return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
+}
+
static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
{
struct nft_hash_table *tbl;
@@ -161,7 +166,6 @@ static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
break;
}
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -201,7 +205,6 @@ static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
;
RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -237,10 +240,9 @@ static int nft_hash_insert(const struct nft_set *set,
h = nft_hash_data(&he->key, tbl->size, set->klen);
RCU_INIT_POINTER(he->next, tbl->buckets[h]);
rcu_assign_pointer(tbl->buckets[h], he);
- tbl->elements++;
/* Expand table when exceeding 75% load */
- if (tbl->elements > tbl->size / 4 * 3)
+ if (set->nelems + 1 > tbl->size / 4 * 3)
nft_hash_tbl_expand(set, priv);
return 0;
@@ -268,10 +270,9 @@ static void nft_hash_remove(const struct nft_set *set,
RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
- tbl->elements--;
/* Shrink table beneath 30% load */
- if (tbl->elements < tbl->size * 3 / 10 &&
+ if (set->nelems - 1 < tbl->size * 3 / 10 &&
tbl->size > NFT_HASH_MIN_SIZE)
nft_hash_tbl_shrink(set, priv);
}
@@ -335,17 +336,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
}
static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl;
+ unsigned int size;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+ size = NFT_HASH_MIN_SIZE;
+ if (desc->size)
+ size = nft_hash_tbl_size(desc->size);
+
+ tbl = nft_hash_tbl_alloc(size);
if (tbl == NULL)
return -ENOMEM;
RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +376,37 @@ static void nft_hash_destroy(const struct nft_set *set)
kfree(tbl);
}
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (features & NFT_SET_MAP)
+ esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ nft_hash_tbl_size(desc->size) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.get = nft_hash_get,
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea8aa2..6404a726d17 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_LOOKUP_SET_ID]) {
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_LOOKUP_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
err = nft_validate_input_register(priv->sreg);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39af89..852b178c6ae 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
err:
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-static void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
-static int nft_meta_init_validate_set(uint32_t key)
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- switch (key) {
- case NFT_META_MARK:
- case NFT_META_PRIORITY:
- case NFT_META_NFTRACE:
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
-static int nft_meta_init_validate_get(uint32_t key)
-{
- switch (key) {
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
case NFT_META_LEN:
case NFT_META_PROTOCOL:
case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key)
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
- return 0;
+ break;
default:
return -EOPNOTSUPP;
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-
- if (tb[NFTA_META_DREG]) {
- err = nft_meta_init_validate_get(priv->key);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- return nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
+ switch (priv->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+ break;
+ default:
+ return -EOPNOTSUPP;
}
- err = nft_meta_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
err = nft_validate_input_register(priv->sreg);
if (err < 0)
@@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
-static int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
-static int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
- .init = nft_meta_init,
+ .init = nft_meta_get_init,
.dump = nft_meta_get_dump,
};
@@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
- .init = nft_meta_init,
+ .init = nft_meta_set_init,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d1350..072e611e9f7 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -201,6 +201,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
}
static int nft_rbtree_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[])
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -222,8 +223,28 @@ static void nft_rbtree_destroy(const struct nft_set *set)
}
}
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int nsize;
+
+ nsize = sizeof(struct nft_rbtree_elem);
+ if (features & NFT_SET_MAP)
+ nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+ if (desc->size)
+ est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ else
+ est->size = nsize;
+
+ est->class = NFT_SET_CLASS_O_LOG_N;
+
+ return true;
+}
+
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,