summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_metrics.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 11:17:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 11:17:34 -0800
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /net/ipv4/tcp_metrics.c
parent82c477669a4665eb4e52030792051e0559ee2a36 (diff)
parent8b662fe70c68282f78482dc272df0c4f355e49f5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r--net/ipv4/tcp_metrics.c194
1 files changed, 131 insertions, 63 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 098b3a29f6f..d547075d830 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -22,7 +22,8 @@
int sysctl_tcp_nometrics_save __read_mostly;
-static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr,
+static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
+ const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash);
struct tcp_fastopen_metrics {
@@ -34,7 +35,8 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- struct inetpeer_addr tcpm_addr;
+ struct inetpeer_addr tcpm_saddr;
+ struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
u32 tcpm_ts;
u32 tcpm_ts_stamp;
@@ -145,7 +147,8 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst
#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
- struct inetpeer_addr *addr,
+ struct inetpeer_addr *saddr,
+ struct inetpeer_addr *daddr,
unsigned int hash)
{
struct tcp_metrics_block *tm;
@@ -158,7 +161,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
/* While waiting for the spin-lock the cache might have been populated
* with this entry and so we have to check again.
*/
- tm = __tcp_get_metrics(addr, net, hash);
+ tm = __tcp_get_metrics(saddr, daddr, net, hash);
if (tm == TCP_METRICS_RECLAIM_PTR) {
reclaim = true;
tm = NULL;
@@ -183,7 +186,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
if (!tm)
goto out_unlock;
}
- tm->tcpm_addr = *addr;
+ tm->tcpm_saddr = *saddr;
+ tm->tcpm_daddr = *daddr;
tcpm_suck_dst(tm, dst, true);
@@ -206,7 +210,8 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in
return NULL;
}
-static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr,
+static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
+ const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash)
{
struct tcp_metrics_block *tm;
@@ -214,7 +219,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *a
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_addr, addr))
+ if (addr_same(&tm->tcpm_saddr, saddr) &&
+ addr_same(&tm->tcpm_daddr, daddr))
break;
depth++;
}
@@ -225,19 +231,22 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
struct dst_entry *dst)
{
struct tcp_metrics_block *tm;
- struct inetpeer_addr addr;
+ struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct net *net;
- addr.family = req->rsk_ops->family;
- switch (addr.family) {
+ saddr.family = req->rsk_ops->family;
+ daddr.family = req->rsk_ops->family;
+ switch (daddr.family) {
case AF_INET:
- addr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
- hash = (__force unsigned int) addr.addr.a4;
+ saddr.addr.a4 = inet_rsk(req)->ir_loc_addr;
+ daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
+ hash = (__force unsigned int) daddr.addr.a4;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
+ *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr;
+ *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
break;
#endif
@@ -250,7 +259,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_addr, &addr))
+ if (addr_same(&tm->tcpm_saddr, &saddr) &&
+ addr_same(&tm->tcpm_daddr, &daddr))
break;
}
tcpm_check_stamp(tm, dst);
@@ -260,32 +270,44 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
{
struct tcp_metrics_block *tm;
- struct inetpeer_addr addr;
+ struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct net *net;
- addr.family = tw->tw_family;
- switch (addr.family) {
- case AF_INET:
- addr.addr.a4 = tw->tw_daddr;
- hash = (__force unsigned int) addr.addr.a4;
- break;
+ if (tw->tw_family == AF_INET) {
+ saddr.family = AF_INET;
+ saddr.addr.a4 = tw->tw_rcv_saddr;
+ daddr.family = AF_INET;
+ daddr.addr.a4 = tw->tw_daddr;
+ hash = (__force unsigned int) daddr.addr.a4;
+ }
#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr;
- hash = ipv6_addr_hash(&tw->tw_v6_daddr);
- break;
+ else if (tw->tw_family == AF_INET6) {
+ if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
+ saddr.family = AF_INET;
+ saddr.addr.a4 = tw->tw_rcv_saddr;
+ daddr.family = AF_INET;
+ daddr.addr.a4 = tw->tw_daddr;
+ hash = (__force unsigned int) daddr.addr.a4;
+ } else {
+ saddr.family = AF_INET6;
+ *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr;
+ daddr.family = AF_INET6;
+ *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr;
+ hash = ipv6_addr_hash(&tw->tw_v6_daddr);
+ }
+ }
#endif
- default:
+ else
return NULL;
- }
net = twsk_net(tw);
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_addr, &addr))
+ if (addr_same(&tm->tcpm_saddr, &saddr) &&
+ addr_same(&tm->tcpm_daddr, &daddr))
break;
}
return tm;
@@ -296,34 +318,45 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
bool create)
{
struct tcp_metrics_block *tm;
- struct inetpeer_addr addr;
+ struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct net *net;
- addr.family = sk->sk_family;
- switch (addr.family) {
- case AF_INET:
- addr.addr.a4 = inet_sk(sk)->inet_daddr;
- hash = (__force unsigned int) addr.addr.a4;
- break;
+ if (sk->sk_family == AF_INET) {
+ saddr.family = AF_INET;
+ saddr.addr.a4 = inet_sk(sk)->inet_saddr;
+ daddr.family = AF_INET;
+ daddr.addr.a4 = inet_sk(sk)->inet_daddr;
+ hash = (__force unsigned int) daddr.addr.a4;
+ }
#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr;
- hash = ipv6_addr_hash(&sk->sk_v6_daddr);
- break;
+ else if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
+ saddr.family = AF_INET;
+ saddr.addr.a4 = inet_sk(sk)->inet_saddr;
+ daddr.family = AF_INET;
+ daddr.addr.a4 = inet_sk(sk)->inet_daddr;
+ hash = (__force unsigned int) daddr.addr.a4;
+ } else {
+ saddr.family = AF_INET6;
+ *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr;
+ daddr.family = AF_INET6;
+ *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr;
+ hash = ipv6_addr_hash(&sk->sk_v6_daddr);
+ }
+ }
#endif
- default:
+ else
return NULL;
- }
net = dev_net(dst->dev);
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
- tm = __tcp_get_metrics(&addr, net, hash);
+ tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
if (tm == TCP_METRICS_RECLAIM_PTR)
tm = NULL;
if (!tm && create)
- tm = tcpm_new(dst, &addr, hash);
+ tm = tcpm_new(dst, &saddr, &daddr, hash);
else
tcpm_check_stamp(tm, dst);
@@ -737,15 +770,21 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
struct nlattr *nest;
int i;
- switch (tm->tcpm_addr.family) {
+ switch (tm->tcpm_daddr.family) {
case AF_INET:
if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4,
- tm->tcpm_addr.addr.a4) < 0)
+ tm->tcpm_daddr.addr.a4) < 0)
+ goto nla_put_failure;
+ if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4,
+ tm->tcpm_saddr.addr.a4) < 0)
goto nla_put_failure;
break;
case AF_INET6:
if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16,
- tm->tcpm_addr.addr.a6) < 0)
+ tm->tcpm_daddr.addr.a6) < 0)
+ goto nla_put_failure;
+ if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16,
+ tm->tcpm_saddr.addr.a6) < 0)
goto nla_put_failure;
break;
default:
@@ -868,44 +907,66 @@ done:
return skb->len;
}
-static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
- unsigned int *hash, int optional)
+static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
+ unsigned int *hash, int optional, int v4, int v6)
{
struct nlattr *a;
- a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4];
+ a = info->attrs[v4];
if (a) {
addr->family = AF_INET;
addr->addr.a4 = nla_get_be32(a);
- *hash = (__force unsigned int) addr->addr.a4;
+ if (hash)
+ *hash = (__force unsigned int) addr->addr.a4;
return 0;
}
- a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
+ a = info->attrs[v6];
if (a) {
if (nla_len(a) != sizeof(struct in6_addr))
return -EINVAL;
addr->family = AF_INET6;
memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
- *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
+ if (hash)
+ *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
return 0;
}
return optional ? 1 : -EAFNOSUPPORT;
}
+static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
+ unsigned int *hash, int optional)
+{
+ return __parse_nl_addr(info, addr, hash, optional,
+ TCP_METRICS_ATTR_ADDR_IPV4,
+ TCP_METRICS_ATTR_ADDR_IPV6);
+}
+
+static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr)
+{
+ return __parse_nl_addr(info, addr, NULL, 0,
+ TCP_METRICS_ATTR_SADDR_IPV4,
+ TCP_METRICS_ATTR_SADDR_IPV6);
+}
+
static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct tcp_metrics_block *tm;
- struct inetpeer_addr addr;
+ struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct sk_buff *msg;
struct net *net = genl_info_net(info);
void *reply;
int ret;
+ bool src = true;
- ret = parse_nl_addr(info, &addr, &hash, 0);
+ ret = parse_nl_addr(info, &daddr, &hash, 0);
if (ret < 0)
return ret;
+ ret = parse_nl_saddr(info, &saddr);
+ if (ret < 0)
+ src = false;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -920,7 +981,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
rcu_read_lock();
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_addr, &addr)) {
+ if (addr_same(&tm->tcpm_daddr, &daddr) &&
+ (!src || addr_same(&tm->tcpm_saddr, &saddr))) {
ret = tcp_metrics_fill_info(msg, tm);
break;
}
@@ -975,32 +1037,38 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct tcpm_hash_bucket *hb;
struct tcp_metrics_block *tm;
struct tcp_metrics_block __rcu **pp;
- struct inetpeer_addr addr;
+ struct inetpeer_addr saddr, daddr;
unsigned int hash;
struct net *net = genl_info_net(info);
int ret;
+ bool src = true, found = false;
- ret = parse_nl_addr(info, &addr, &hash, 1);
+ ret = parse_nl_addr(info, &daddr, &hash, 1);
if (ret < 0)
return ret;
if (ret > 0)
return tcp_metrics_flush_all(net);
+ ret = parse_nl_saddr(info, &saddr);
+ if (ret < 0)
+ src = false;
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
hb = net->ipv4.tcp_metrics_hash + hash;
pp = &hb->chain;
spin_lock_bh(&tcp_metrics_lock);
- for (tm = deref_locked_genl(*pp); tm;
- pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) {
- if (addr_same(&tm->tcpm_addr, &addr)) {
+ for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) {
+ if (addr_same(&tm->tcpm_daddr, &daddr) &&
+ (!src || addr_same(&tm->tcpm_saddr, &saddr))) {
*pp = tm->tcpm_next;
- break;
+ kfree_rcu(tm, rcu_head);
+ found = true;
+ } else {
+ pp = &tm->tcpm_next;
}
}
spin_unlock_bh(&tcp_metrics_lock);
- if (!tm)
+ if (!found)
return -ESRCH;
- kfree_rcu(tm, rcu_head);
return 0;
}