summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 18:43:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 18:43:13 -0700
commitfd37ce34bd512f2b1a503f82abf8768da556a955 (patch)
tree557ff43ff5291d1704527e31293633fbc2f956d5
parent4b24ff71108164e047cf2c95990b77651163e315 (diff)
parentcaacf05e5ad1abf0a2864863da4e33024bc68ec6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking update from David S. Miller: "I think Eric Dumazet and I have dealt with all of the known routing cache removal fallout. Some other minor fixes all around. 1) Fix RCU of cached routes, particular of output routes which require liberation via call_rcu() instead of call_rcu_bh(). From Eric Dumazet. 2) Make sure we purge net device references in cached routes properly. 3) TG3 driver bug fixes from Michael Chan. 4) Fix reported 'expires' value in ipv6 routes, from Li Wei. 5) TUN driver ioctl leaks kernel bytes to userspace, from Mathias Krause." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (22 commits) ipv4: Properly purge netdev references on uncached routes. ipv4: Cache routes in nexthop exception entries. ipv4: percpu nh_rth_output cache ipv4: Restore old dst_free() behavior. bridge: make port attributes const ipv4: remove rt_cache_rebuild_count net: ipv4: fix RCU races on dst refcounts net: TCP early demux cleanup tun: Fix formatting. net/tun: fix ioctl() based info leaks tg3: Update version to 3.124 tg3: Fix race condition in tg3_get_stats64() tg3: Add New 5719 Read DMA workaround tg3: Fix Read DMA workaround for 5719 A0. tg3: Request APE_LOCK_PHY before PHY access ipv6: fix incorrect route 'expires' value passed to userspace mISDN: Bugfix only few bytes are transfered on a connection seeq: use PTR_RET at init_module of driver bnx2x: remove cast around the kmalloc in bnx2x_prev_mark_path ipv4: clean up put_child ...
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--drivers/isdn/hardware/mISDN/avmfritz.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c3
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c71
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h8
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c1
-rw-r--r--drivers/net/ethernet/seeq/seeq8005.c4
-rw-r--r--drivers/net/tun.c6
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip_fib.h6
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/route.h3
-rw-r--r--net/bridge/br_sysfs_if.c6
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c42
-rw-r--r--net/ipv4/fib_trie.c53
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/route.c183
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/route.c8
26 files changed, 316 insertions, 145 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 406a5226220..ca447b35b83 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -48,12 +48,6 @@ min_adv_mss - INTEGER
The advertised MSS depends on the first hop route MTU, but will
never be lower than this setting.
-rt_cache_rebuild_count - INTEGER
- The per net-namespace route cache emergency rebuild threshold.
- Any net-namespace having its route cache rebuilt due to
- a hash bucket chain being too long more than this many times
- will have its route caching disabled
-
IP Fragmentation:
ipfrag_high_thresh - INTEGER
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index c08fc605e56..fa6ca473372 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -449,7 +449,8 @@ hdlc_fill_fifo(struct bchannel *bch)
{
struct fritzcard *fc = bch->hw;
struct hdlc_hw *hdlc;
- int count, fs, cnt = 0, idx, fillempty = 0;
+ int count, fs, cnt = 0, idx;
+ bool fillempty = false;
u8 *p;
u32 *ptr, val, addr;
@@ -462,7 +463,7 @@ hdlc_fill_fifo(struct bchannel *bch)
return;
count = fs;
p = bch->fill;
- fillempty = 1;
+ fillempty = true;
} else {
count = bch->tx_skb->len - bch->tx_idx;
if (count <= 0)
@@ -477,7 +478,7 @@ hdlc_fill_fifo(struct bchannel *bch)
hdlc->ctrl.sr.cmd |= HDLC_CMD_XME;
}
ptr = (u32 *)p;
- if (fillempty) {
+ if (!fillempty) {
pr_debug("%s.B%d: %d/%d/%d", fc->name, bch->nr, count,
bch->tx_idx, bch->tx_skb->len);
bch->tx_idx += count;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 9aaf863b423..dd451c3dd83 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9360,8 +9360,7 @@ static int __devinit bnx2x_prev_mark_path(struct bnx2x *bp)
struct bnx2x_prev_path_list *tmp_list;
int rc;
- tmp_list = (struct bnx2x_prev_path_list *)
- kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
+ tmp_list = kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
if (!tmp_list) {
BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n");
return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 9a009fd6ea1..bf906c51d82 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -92,7 +92,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
#define DRV_MODULE_NAME "tg3"
#define TG3_MAJ_NUM 3
-#define TG3_MIN_NUM 123
+#define TG3_MIN_NUM 124
#define DRV_MODULE_VERSION \
__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
#define DRV_MODULE_RELDATE "March 21, 2012"
@@ -672,6 +672,12 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum)
else
bit = 1 << tp->pci_fn;
break;
+ case TG3_APE_LOCK_PHY0:
+ case TG3_APE_LOCK_PHY1:
+ case TG3_APE_LOCK_PHY2:
+ case TG3_APE_LOCK_PHY3:
+ bit = APE_LOCK_REQ_DRIVER;
+ break;
default:
return -EINVAL;
}
@@ -723,6 +729,12 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum)
else
bit = 1 << tp->pci_fn;
break;
+ case TG3_APE_LOCK_PHY0:
+ case TG3_APE_LOCK_PHY1:
+ case TG3_APE_LOCK_PHY2:
+ case TG3_APE_LOCK_PHY3:
+ bit = APE_LOCK_GRANT_DRIVER;
+ break;
default:
return;
}
@@ -1052,6 +1064,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
udelay(80);
}
+ tg3_ape_lock(tp, tp->phy_ape_lock);
+
*val = 0x0;
frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
@@ -1086,6 +1100,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
udelay(80);
}
+ tg3_ape_unlock(tp, tp->phy_ape_lock);
+
return ret;
}
@@ -1105,6 +1121,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
udelay(80);
}
+ tg3_ape_lock(tp, tp->phy_ape_lock);
+
frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
MI_COM_PHY_ADDR_MASK);
frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
@@ -1135,6 +1153,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
udelay(80);
}
+ tg3_ape_unlock(tp, tp->phy_ape_lock);
+
return ret;
}
@@ -9066,8 +9086,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 ||
tg3_flag(tp, 57765_PLUS)) {
val = tr32(TG3_RDMA_RSRVCTRL_REG);
- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
- GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) {
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0) {
val &= ~(TG3_RDMA_RSRVCTRL_TXMRGN_MASK |
TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK |
TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK);
@@ -9257,6 +9276,19 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
tw32_f(RDMAC_MODE, rdmac_mode);
udelay(40);
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
+ for (i = 0; i < TG3_NUM_RDMA_CHANNELS; i++) {
+ if (tr32(TG3_RDMA_LENGTH + (i << 2)) > TG3_MAX_MTU(tp))
+ break;
+ }
+ if (i < TG3_NUM_RDMA_CHANNELS) {
+ val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
+ val |= TG3_LSO_RD_DMA_TX_LENGTH_WA;
+ tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
+ tg3_flag_set(tp, 5719_RDMA_BUG);
+ }
+ }
+
tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
if (!tg3_flag(tp, 5705_PLUS))
tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
@@ -9616,6 +9648,16 @@ static void tg3_periodic_fetch_stats(struct tg3 *tp)
TG3_STAT_ADD32(&sp->tx_ucast_packets, MAC_TX_STATS_UCAST);
TG3_STAT_ADD32(&sp->tx_mcast_packets, MAC_TX_STATS_MCAST);
TG3_STAT_ADD32(&sp->tx_bcast_packets, MAC_TX_STATS_BCAST);
+ if (unlikely(tg3_flag(tp, 5719_RDMA_BUG) &&
+ (sp->tx_ucast_packets.low + sp->tx_mcast_packets.low +
+ sp->tx_bcast_packets.low) > TG3_NUM_RDMA_CHANNELS)) {
+ u32 val;
+
+ val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
+ val &= ~TG3_LSO_RD_DMA_TX_LENGTH_WA;
+ tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
+ tg3_flag_clear(tp, 5719_RDMA_BUG);
+ }
TG3_STAT_ADD32(&sp->rx_octets, MAC_RX_STATS_OCTETS);
TG3_STAT_ADD32(&sp->rx_fragments, MAC_RX_STATS_FRAGMENTS);
@@ -12482,10 +12524,12 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
{
struct tg3 *tp = netdev_priv(dev);
- if (!tp->hw_stats)
+ spin_lock_bh(&tp->lock);
+ if (!tp->hw_stats) {
+ spin_unlock_bh(&tp->lock);
return &tp->net_stats_prev;
+ }
- spin_lock_bh(&tp->lock);
tg3_get_nstats(tp, stats);
spin_unlock_bh(&tp->lock);
@@ -13648,6 +13692,23 @@ static int __devinit tg3_phy_probe(struct tg3 *tp)
tg3_flag_set(tp, PAUSE_AUTONEG);
tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX;
+ if (tg3_flag(tp, ENABLE_APE)) {
+ switch (tp->pci_fn) {
+ case 0:
+ tp->phy_ape_lock = TG3_APE_LOCK_PHY0;
+ break;
+ case 1:
+ tp->phy_ape_lock = TG3_APE_LOCK_PHY1;
+ break;
+ case 2:
+ tp->phy_ape_lock = TG3_APE_LOCK_PHY2;
+ break;
+ case 3:
+ tp->phy_ape_lock = TG3_APE_LOCK_PHY3;
+ break;
+ }
+ }
+
if (tg3_flag(tp, USE_PHYLIB))
return tg3_phy_init(tp);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a1b75cd67b9..6d52cb28682 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -1376,7 +1376,11 @@
#define TG3_LSO_RD_DMA_CRPTEN_CTRL 0x00004910
#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K 0x00030000
#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K 0x000c0000
-/* 0x4914 --> 0x4c00 unused */
+#define TG3_LSO_RD_DMA_TX_LENGTH_WA 0x02000000
+/* 0x4914 --> 0x4be0 unused */
+
+#define TG3_NUM_RDMA_CHANNELS 4
+#define TG3_RDMA_LENGTH 0x00004be0
/* Write DMA control registers */
#define WDMAC_MODE 0x00004c00
@@ -2959,6 +2963,7 @@ enum TG3_FLAGS {
TG3_FLAG_L1PLLPD_EN,
TG3_FLAG_APE_HAS_NCSI,
TG3_FLAG_4K_FIFO_LIMIT,
+ TG3_FLAG_5719_RDMA_BUG,
TG3_FLAG_RESET_TASK_PENDING,
TG3_FLAG_5705_PLUS,
TG3_FLAG_IS_5788,
@@ -3107,6 +3112,7 @@ struct tg3 {
int old_link;
u8 phy_addr;
+ u8 phy_ape_lock;
/* PHY info */
u32 phy_id;
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 3769f5711cc..b53a3b60b64 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4682,6 +4682,7 @@ static int __devinit qlge_probe(struct pci_dev *pdev,
NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM;
ndev->features = ndev->hw_features |
NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER;
+ ndev->vlan_features = ndev->hw_features;
if (test_bit(QL_DMA64, &qdev->flags))
ndev->features |= NETIF_F_HIGHDMA;
diff --git a/drivers/net/ethernet/seeq/seeq8005.c b/drivers/net/ethernet/seeq/seeq8005.c
index 698edbbfc14..d6e50de7118 100644
--- a/drivers/net/ethernet/seeq/seeq8005.c
+++ b/drivers/net/ethernet/seeq/seeq8005.c
@@ -736,9 +736,7 @@ MODULE_PARM_DESC(irq, "SEEQ 8005 IRQ number");
int __init init_module(void)
{
dev_seeq = seeq8005_probe(-1);
- if (IS_ERR(dev_seeq))
- return PTR_ERR(dev_seeq);
- return 0;
+ return PTR_RET(dev_seeq);
}
void __exit cleanup_module(void)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c62163e272c..926d4db5cb3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1379,10 +1379,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
int vnet_hdr_sz;
int ret;
- if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+ if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
if (copy_from_user(&ifr, argp, ifreq_len))
return -EFAULT;
-
+ } else {
+ memset(&ifr, 0, sizeof(ifr));
+ }
if (cmd == TUNGETFEATURES) {
/* Currently this just means: "what IFF flags are valid?".
* This is needed because we never checked for invalid flags on
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 613cfa40167..83b567fe194 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -249,4 +249,13 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
return flags;
}
+static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+
#endif /* _INET_SOCK_H */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e69c3a47153..926142ed8d7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>
+#include <linux/percpu.h>
struct fib_config {
u8 fc_dst_len;
@@ -54,6 +55,7 @@ struct fib_nh_exception {
u32 fnhe_pmtu;
__be32 fnhe_gw;
unsigned long fnhe_expires;
+ struct rtable __rcu *fnhe_rth;
unsigned long fnhe_stamp;
};
@@ -81,8 +83,8 @@ struct fib_nh {
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
- struct rtable *nh_rth_output;
- struct rtable *nh_rth_input;
+ struct rtable __rcu * __percpu *nh_pcpu_rth_output;
+ struct rtable __rcu *nh_rth_input;
struct fnhe_hash_bucket *nh_exceptions;
};
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0ffb8e31f3c..1474dd65c66 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -61,8 +61,6 @@ struct netns_ipv4 {
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
int sysctl_icmp_errors_use_inbound_ifaddr;
- int sysctl_rt_cache_rebuild_count;
- int current_rt_cache_rebuild_count;
unsigned int sysctl_ping_group_range[2];
long sysctl_tcp_mem[3];
diff --git a/include/net/route.h b/include/net/route.h
index 8c52bc6f1c9..776a27f1ab7 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -57,6 +57,8 @@ struct rtable {
/* Miscellaneous cached information */
u32 rt_pmtu;
+
+ struct list_head rt_uncached;
};
static inline bool rt_is_input_route(const struct rtable *rt)
@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
struct in_device;
extern int ip_rt_init(void);
extern void rt_cache_flush(struct net *net, int how);
+extern void rt_flush_dev(struct net_device *dev);
extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 6229b62749e..13b36bdc76a 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -27,7 +27,7 @@ struct brport_attribute {
};
#define BRPORT_ATTR(_name,_mode,_show,_store) \
-struct brport_attribute brport_attr_##_name = { \
+const struct brport_attribute brport_attr_##_name = { \
.attr = {.name = __stringify(_name), \
.mode = _mode }, \
.show = _show, \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
store_multicast_router);
#endif
-static struct brport_attribute *brport_attrs[] = {
+static const struct brport_attribute *brport_attrs[] = {
&brport_attr_path_cost,
&brport_attr_priority,
&brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
int br_sysfs_addif(struct net_bridge_port *p)
{
struct net_bridge *br = p->br;
- struct brport_attribute **a;
+ const struct brport_attribute **a;
int err;
err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc9e380f0ab..5ff949dc954 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
.rta_id = id,
};
- if (expires)
- ci.rta_expires = jiffies_to_clock_t(expires);
+ if (expires) {
+ unsigned long clock;
+ clock = jiffies_to_clock_t(abs(expires));
+ clock = min_t(unsigned long, clock, INT_MAX);
+ ci.rta_expires = (expires > 0) ? clock : -clock;
+ }
return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8732cc7920e..c43ae3fba79 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2, -1);
+ rt_flush_dev(dev);
return NOTIFY_DONE;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da0cc2e6b25..da80dc14cc7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
},
};
+static void rt_fibinfo_free(struct rtable __rcu **rtp)
+{
+ struct rtable *rt = rcu_dereference_protected(*rtp, 1);
+
+ if (!rt)
+ return;
+
+ /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
+ * because we waited an RCU grace period before calling
+ * free_fib_info_rcu()
+ */
+
+ dst_free(&rt->dst);
+}
+
static void free_nh_exceptions(struct fib_nh *nh)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+ rt_fibinfo_free(&fnhe->fnhe_rth);
+
kfree(fnhe);
fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
kfree(hash);
}
+static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
+{
+ int cpu;
+
+ if (!rtp)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rtable *rt;
+
+ rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
+ if (rt)
+ dst_free(&rt->dst);
+ }
+ free_percpu(rtp);
+}
+
/* Release a nexthop info record */
static void free_fib_info_rcu(struct rcu_head *head)
{
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
- if (nexthop_nh->nh_rth_output)
- dst_free(&nexthop_nh->nh_rth_output->dst);
- if (nexthop_nh->nh_rth_input)
- dst_free(&nexthop_nh->nh_rth_input->dst);
+ rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
+ rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
release_net(fi->fib_net);
@@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_nhs = nhs;
change_nexthops(fi) {
nexthop_nh->nh_parent = fi;
+ nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
} endfor_nexthops(fi)
if (cfg->fc_mx) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18cbc15b20d..f0cdb30921c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -159,7 +159,6 @@ struct trie {
#endif
};
-static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull);
static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
}
pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
- sizeof(struct rt_trie_node) << bits);
+ sizeof(struct rt_trie_node *) << bits);
return tn;
}
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
return ((struct tnode *) n)->pos == tn->pos + tn->bits;
}
-static inline void put_child(struct trie *t, struct tnode *tn, int i,
+static inline void put_child(struct tnode *tn, int i,
struct rt_trie_node *n)
{
tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
goto nomem;
}
- put_child(t, tn, 2*i, (struct rt_trie_node *) left);
- put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
+ put_child(tn, 2*i, (struct rt_trie_node *) left);
+ put_child(tn, 2*i+1, (struct rt_trie_node *) right);
}
}
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if (tkey_extract_bits(node->key,
oldtnode->pos + oldtnode->bits,
1) == 0)
- put_child(t, tn, 2*i, node);
+ put_child(tn, 2*i, node);
else
- put_child(t, tn, 2*i+1, node);
+ put_child(tn, 2*i+1, node);
continue;
}
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
- put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
+ put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
*/
left = (struct tnode *) tnode_get_child(tn, 2*i);
- put_child(t, tn, 2*i, NULL);
+ put_child(tn, 2*i, NULL);
BUG_ON(!left);
right = (struct tnode *) tnode_get_child(tn, 2*i+1);
- put_child(t, tn, 2*i+1, NULL);
+ put_child(tn, 2*i+1, NULL);
BUG_ON(!right);
size = tnode_child_length(left);
for (j = 0; j < size; j++) {
- put_child(t, left, j, rtnl_dereference(inode->child[j]));
- put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
+ put_child(left, j, rtnl_dereference(inode->child[j]));
+ put_child(right, j, rtnl_dereference(inode->child[j + size]));
}
- put_child(t, tn, 2*i, resize(t, left));
- put_child(t, tn, 2*i+1, resize(t, right));
+ put_child(tn, 2*i, resize(t, left));
+ put_child(tn, 2*i+1, resize(t, right));
tnode_free_safe(inode);
}
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (!newn)
goto nomem;
- put_child(t, tn, i/2, (struct rt_trie_node *)newn);
+ put_child(tn, i/2, (struct rt_trie_node *)newn);
}
}
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (left == NULL) {
if (right == NULL) /* Both are empty */
continue;
- put_child(t, tn, i/2, right);
+ put_child(tn, i/2, right);
continue;
}
if (right == NULL) {
- put_child(t, tn, i/2, left);
+ put_child(tn, i/2, left);
continue;
}
/* Two nonempty children */
newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
- put_child(t, tn, i/2, NULL);
- put_child(t, newBinNode, 0, left);
- put_child(t, newBinNode, 1, right);
- put_child(t, tn, i/2, resize(t, newBinNode));
+ put_child(tn, i/2, NULL);
+ put_child(newBinNode, 0, left);
+ put_child(newBinNode, 1, right);
+ put_child(tn, i/2, resize(t, newBinNode));
}
tnode_free_safe(oldtnode);
return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)l, tp);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)l);
+ put_child(tp, cindex, (struct rt_trie_node *)l);
} else {
/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
/*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)tn, tp);
missbit = tkey_extract_bits(key, newpos, 1);
- put_child(t, tn, missbit, (struct rt_trie_node *)l);
- put_child(t, tn, 1-missbit, n);
+ put_child(tn, missbit, (struct rt_trie_node *)l);
+ put_child(tn, 1-missbit, n);
if (tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)tn);
+ put_child(tp, cindex, (struct rt_trie_node *)tn);
} else {
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
tp = tn;
@@ -1619,7 +1618,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
if (tp) {
t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
- put_child(t, tp, cindex, NULL);
+ put_child(tp, cindex, NULL);
trie_rebalance(t, tp);
} else
RCU_INIT_POINTER(t->trie, NULL);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 981ff1eef28..f1395a6fb35 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
const struct net_protocol *ipprot;
int protocol = iph->protocol;
- rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux) {
ipprot->early_demux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
- rcu_read_unlock();
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc1a81ca79a..c035251beb0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void ipv4_dst_destroy(struct dst_entry *dst);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
+ .destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
+ struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ orig = rcu_dereference(oldest->fnhe_rth);
+ if (orig) {
+ RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+ rt_free(orig);
+ }
return oldest;
}
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ bool ret = false;
+
+ spin_lock_bh(&fnhe_lock);
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+
+ orig = rcu_dereference(fnhe->fnhe_rth);
+ rcu_assign_pointer(fnhe->fnhe_rth, rt);
+ if (orig)
+ rt_free(orig);
+
+ fnhe->fnhe_stamp = jiffies;
+ ret = true;
+ } else {
+ /* Routes we intend to cache in nexthop exception have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
}
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
-}
+ spin_unlock_bh(&fnhe_lock);
-static inline void rt_free(struct rtable *rt)
-{
- call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
+ return ret;
}
-static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{
- struct rtable *orig, *prev, **p = &nh->nh_rth_output;
-
- if (rt_is_input_route(rt))
- p = &nh->nh_rth_input;
+ struct rtable *orig, *prev, **p;
+ bool ret = true;
+ if (rt_is_input_route(rt)) {
+ p = (struct rtable **)&nh->nh_rth_input;
+ } else {
+ if (!nh->nh_pcpu_rth_output)
+ goto nocache;
+ p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
orig = *p;
prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
* unsuccessful at storing this route into the cache
* we really need to set it.
*/
+nocache:
rt->dst.flags |= DST_NOCACHE;
+ ret = false;
+ }
+
+ return ret;
+}
+
+static DEFINE_SPINLOCK(rt_uncached_lock);
+static LIST_HEAD(rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
+{
+ spin_lock_bh(&rt_uncached_lock);
+ list_add_tail(&rt->rt_uncached, &rt_uncached_list);
+ spin_unlock_bh(&rt_uncached_lock);
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (dst->flags & DST_NOCACHE) {
+ spin_lock_bh(&rt_uncached_lock);
+ list_del(&rt->rt_uncached);
+ spin_unlock_bh(&rt_uncached_lock);
+ }
+}
+
+void rt_flush_dev(struct net_device *dev)
+{
+ if (!list_empty(&rt_uncached_list)) {
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+
+ spin_lock_bh(&rt_uncached_lock);
+ list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+ if (rt->dst.dev != dev)
+ continue;
+ rt->dst.dev = net->loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(dev);
+ }
+ spin_unlock_bh(&rt_uncached_lock);
}
}
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
+ bool cached = false;
+
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_NOCACHE))
- rt_cache_route(nh, rt);
+ if (unlikely(fnhe))
+ cached = rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
+ cached = rt_cache_route(nh, rt);
}
+ if (unlikely(!cached))
+ rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = false;
if (res->fi) {
if (!itag) {
- rth = FIB_RES_NH(*res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1582,7 +1653,7 @@ local_input:
do_cache = false;
if (res.fi) {
if (!itag) {
- rth = FIB_RES_NH(res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@@ -1610,6 +1681,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
+ struct rtable __rcu **prth;
+
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe) {
- rth = FIB_RES_NH(*res).nh_rth_output;
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ if (fnhe)
+ prth = &fnhe->fnhe_rth;
+ else
+ prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ rth = rcu_dereference(*prth);
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ INIT_LIST_HEAD(&rt->rt_uncached);
+
dst_free(new);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5840c325572..4b6487a6827 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "rt_cache_rebuild_count",
- .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "ping_group_range",
.data = &init_net.ipv4.sysctl_ping_group_range,
.maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
@@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table[5].data =
&net->ipv4.sysctl_icmp_ratemask;
table[6].data =
- &net->ipv4.sysctl_rt_cache_rebuild_count;
- table[7].data =
&net->ipv4.sysctl_ping_group_range;
}
@@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_ping_group_range[0] = 1;
net->ipv4.sysctl_ping_group_range[1] = 0;
- net->ipv4.sysctl_rt_cache_rebuild_count = 4;
-
tcp_init_mem(net);
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a356e1fecf9..9be30b039ae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
if (skb != NULL) {
- sk->sk_rx_dst = dst_clone(skb_dst(skb));
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ inet_sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2fbd9921253..7f91e5ac827 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
- if (sk->sk_rx_dst) {
- struct dst_entry *dst = sk->sk_rx_dst;
+ if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, 0) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
}
- if (unlikely(sk->sk_rx_dst == NULL)) {
- sk->sk_rx_dst = dst_clone(skb_dst(skb));
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- }
+ if (unlikely(sk->sk_rx_dst == NULL))
+ inet_sk_rx_dst_set(sk, skb);
+
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3f1cc2028ed..232a90c3ec8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_sock *oldtp = tcp_sk(sk);
struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
- newsk->sk_rx_dst = dst_clone(skb_dst(skb));
- inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif;
+ inet_sk_rx_dst_set(newsk, skb);
/* TCP Cookie Transactions require space for the cookie pair,
* as it differs for each connection. There is no need to
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c6281847f16..681ea2f413e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 47975e363fc..a52d864d562 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb)
if (sysctl_ip_early_demux && !skb_dst(skb)) {
const struct inet6_protocol *ipprot;
- rcu_read_lock();
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
if (ipprot && ipprot->early_demux)
ipprot->early_demux(skb);
- rcu_read_unlock();
}
if (!skb_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf02cb97bbd..8e80fd27910 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
goto nla_put_failure;
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- expires = 0;
- else if (rt->dst.expires - jiffies < INT_MAX)
- expires = rt->dst.expires - jiffies;
- else
- expires = INT_MAX;
+
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;