From 324fd55a19827b7191cc6ab73865e30c0e6e6423 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:55 +0100 Subject: tcp: metrics: rename tcpm_addr to tcpm_daddr As we will add also the source-address, we rename all accesses to the tcp-metrics address to use "daddr". Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 72 +++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 06493736fbc..f9b5f519a4e 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -31,7 +31,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - struct inetpeer_addr tcpm_addr; + struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; u32 tcpm_ts; u32 tcpm_ts_stamp; @@ -131,7 +131,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, - struct inetpeer_addr *addr, + struct inetpeer_addr *daddr, unsigned int hash, bool reclaim) { @@ -155,7 +155,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } - tm->tcpm_addr = *addr; + tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); @@ -189,7 +189,7 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in return NULL; } -static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *daddr, struct net *net, unsigned int hash) { struct tcp_metrics_block *tm; @@ -197,7 +197,7 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *a for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, addr)) + if (addr_same(&tm->tcpm_daddr, daddr)) break; depth++; } @@ -208,19 +208,19 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr daddr; unsigned int hash; struct net *net; - addr.family = req->rsk_ops->family; - switch (addr.family) { + daddr.family = req->rsk_ops->family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; - hash = (__force unsigned int) addr.addr.a4; + daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -233,7 +233,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_daddr, &daddr)) break; } tcpm_check_stamp(tm, dst); @@ -243,19 +243,19 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr daddr; unsigned int hash; struct net *net; - addr.family = tw->tw_family; - switch (addr.family) { + daddr.family = tw->tw_family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = tw->tw_daddr; - hash = (__force unsigned int) addr.addr.a4; + daddr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; + *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; #endif @@ -268,7 +268,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_daddr, &daddr)) break; } return tm; @@ -279,20 +279,20 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, bool create) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr daddr; unsigned int hash; struct net *net; bool reclaim; - addr.family = sk->sk_family; - switch (addr.family) { + daddr.family = sk->sk_family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = inet_sk(sk)->inet_daddr; - hash = (__force unsigned int) addr.addr.a4; + daddr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; + *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; #endif @@ -303,14 +303,14 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, net = dev_net(dst->dev); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - tm = __tcp_get_metrics(&addr, net, hash); + tm = __tcp_get_metrics(&daddr, net, hash); reclaim = false; if (tm == TCP_METRICS_RECLAIM_PTR) { reclaim = true; tm = NULL; } if (!tm && create) - tm = tcpm_new(dst, &addr, hash, reclaim); + tm = tcpm_new(dst, &daddr, hash, reclaim); else tcpm_check_stamp(tm, dst); @@ -724,15 +724,15 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, struct nlattr *nest; int i; - switch (tm->tcpm_addr.family) { + switch (tm->tcpm_daddr.family) { case AF_INET: if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_addr.addr.a4) < 0) + tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_addr.addr.a6) < 0) + tm->tcpm_daddr.addr.a6) < 0) goto nla_put_failure; break; default: @@ -882,14 +882,14 @@ static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr daddr; unsigned int hash; struct sk_buff *msg; struct net *net = genl_info_net(info); void *reply; int ret; - ret = parse_nl_addr(info, &addr, &hash, 0); + ret = parse_nl_addr(info, &daddr, &hash, 0); if (ret < 0) return ret; @@ -907,7 +907,7 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + if (addr_same(&tm->tcpm_daddr, &daddr)) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -962,12 +962,12 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) struct tcpm_hash_bucket *hb; struct tcp_metrics_block *tm; struct tcp_metrics_block __rcu **pp; - struct inetpeer_addr addr; + struct inetpeer_addr daddr; unsigned int hash; struct net *net = genl_info_net(info); int ret; - ret = parse_nl_addr(info, &addr, &hash, 1); + ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; if (ret > 0) @@ -979,7 +979,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&tcp_metrics_lock); for (tm = deref_locked_genl(*pp); tm; pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + if (addr_same(&tm->tcpm_daddr, &daddr)) { *pp = tm->tcpm_next; break; } -- cgit v1.2.3-70-g09d2 From a544302820db12660b15de185b9e67c781a6b74e Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:56 +0100 Subject: tcp: metrics: Add source-address to tcp-metrics We add the source-address to the tcp-metrics, so that different metrics will be used per source/destination-pair. We use the destination-hash to store the metric inside the hash-table. That way, deleting and dumping via "ip tcp_metrics" is easy. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f9b5f519a4e..de32aa41a84 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -31,6 +31,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; + struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; u32 tcpm_ts; @@ -131,6 +132,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + struct inetpeer_addr *saddr, struct inetpeer_addr *daddr, unsigned int hash, bool reclaim) @@ -155,6 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } + tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); @@ -189,7 +192,8 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in return NULL; } -static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *daddr, +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, + const struct inetpeer_addr *daddr, struct net *net, unsigned int hash) { struct tcp_metrics_block *tm; @@ -197,7 +201,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *d for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_daddr, daddr)) + if (addr_same(&tm->tcpm_saddr, saddr) && + addr_same(&tm->tcpm_daddr, daddr)) break; depth++; } @@ -208,18 +213,21 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; - struct inetpeer_addr daddr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; + saddr.family = req->rsk_ops->family; daddr.family = req->rsk_ops->family; switch (daddr.family) { case AF_INET: + saddr.addr.a4 = inet_rsk(req)->ir_loc_addr; daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr; hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: + *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; @@ -233,7 +241,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_daddr, &daddr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } tcpm_check_stamp(tm, dst); @@ -243,18 +252,21 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { struct tcp_metrics_block *tm; - struct inetpeer_addr daddr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; + saddr.family = tw->tw_family; daddr.family = tw->tw_family; switch (daddr.family) { case AF_INET: + saddr.addr.a4 = tw->tw_rcv_saddr; daddr.addr.a4 = tw->tw_daddr; hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: + *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; @@ -268,7 +280,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_daddr, &daddr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } return tm; @@ -279,19 +292,22 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, bool create) { struct tcp_metrics_block *tm; - struct inetpeer_addr daddr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; bool reclaim; + saddr.family = sk->sk_family; daddr.family = sk->sk_family; switch (daddr.family) { case AF_INET: + saddr.addr.a4 = inet_sk(sk)->inet_saddr; daddr.addr.a4 = inet_sk(sk)->inet_daddr; hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: + *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; @@ -303,14 +319,14 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, net = dev_net(dst->dev); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - tm = __tcp_get_metrics(&daddr, net, hash); + tm = __tcp_get_metrics(&saddr, &daddr, net, hash); reclaim = false; if (tm == TCP_METRICS_RECLAIM_PTR) { reclaim = true; tm = NULL; } if (!tm && create) - tm = tcpm_new(dst, &daddr, hash, reclaim); + tm = tcpm_new(dst, &saddr, &daddr, hash, reclaim); else tcpm_check_stamp(tm, dst); -- cgit v1.2.3-70-g09d2 From 8a59359cb80f448923a7bc9f555d477e74547d7a Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:57 +0100 Subject: tcp: metrics: New netlink attribute for src IP and dumped in netlink reply This patch adds a new netlink attribute for the source-IP and appends it to the netlink reply. Now, iproute2 can have access to the source-IP. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 2 ++ net/ipv4/tcp_metrics.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index cb5157b55f3..54a37b13f2c 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -35,6 +35,8 @@ enum { TCP_METRICS_ATTR_FOPEN_SYN_DROPS, /* u16, count of drops */ TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, /* msecs age */ TCP_METRICS_ATTR_FOPEN_COOKIE, /* binary */ + TCP_METRICS_ATTR_SADDR_IPV4, /* u32 */ + TCP_METRICS_ATTR_SADDR_IPV6, /* binary */ __TCP_METRICS_ATTR_MAX, }; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index de32aa41a84..199659f7a87 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -745,11 +745,17 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; + if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) + goto nla_put_failure; break; case AF_INET6: if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, tm->tcpm_daddr.addr.a6) < 0) goto nla_put_failure; + if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, + tm->tcpm_saddr.addr.a6) < 0) + goto nla_put_failure; break; default: return -EAFNOSUPPORT; -- cgit v1.2.3-70-g09d2 From bbf852b96ebdc6d1be7a67143824523280bbcf44 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:58 +0100 Subject: tcp: metrics: Delete all entries matching a certain destination As we now can have multiple entries per destination-IP, the "ip tcp_metrics delete address ADDRESS" command deletes all of them. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 199659f7a87..e150f264c8e 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -982,7 +982,7 @@ static int tcp_metrics_flush_all(struct net *net) static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct tcpm_hash_bucket *hb; - struct tcp_metrics_block *tm; + struct tcp_metrics_block *tm, *tmlist = NULL; struct tcp_metrics_block __rcu **pp; struct inetpeer_addr daddr; unsigned int hash; @@ -999,17 +999,22 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) hb = net->ipv4.tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; - pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { + for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { if (addr_same(&tm->tcpm_daddr, &daddr)) { *pp = tm->tcpm_next; - break; + tm->tcpm_next = tmlist; + tmlist = tm; + } else { + pp = &tm->tcpm_next; } } spin_unlock_bh(&tcp_metrics_lock); - if (!tm) + if (!tmlist) return -ESRCH; - kfree_rcu(tm, rcu_head); + for (tm = tmlist; tm; tm = tmlist) { + tmlist = tm->tcpm_next; + kfree_rcu(tm, rcu_head); + } return 0; } -- cgit v1.2.3-70-g09d2 From 3e7013ddf55af7bc191792b8aea0c2b94fb0fef5 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:59 +0100 Subject: tcp: metrics: Allow selective get/del of tcp-metrics based on src IP We want to be able to get/del tcp-metrics based on the src IP. This patch adds the necessary parsing of the netlink attribute and if the source address is set, it will match on this one too. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e150f264c8e..699a42faab9 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -877,44 +877,66 @@ done: return skb->len; } -static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, - unsigned int *hash, int optional) +static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional, int v4, int v6) { struct nlattr *a; - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + a = info->attrs[v4]; if (a) { addr->family = AF_INET; addr->addr.a4 = nla_get_be32(a); - *hash = (__force unsigned int) addr->addr.a4; + if (hash) + *hash = (__force unsigned int) addr->addr.a4; return 0; } - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + a = info->attrs[v6]; if (a) { if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + if (hash) + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); return 0; } return optional ? 1 : -EAFNOSUPPORT; } +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + return __parse_nl_addr(info, addr, hash, optional, + TCP_METRICS_ATTR_ADDR_IPV4, + TCP_METRICS_ATTR_ADDR_IPV6); +} + +static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr) +{ + return __parse_nl_addr(info, addr, NULL, 0, + TCP_METRICS_ATTR_SADDR_IPV4, + TCP_METRICS_ATTR_SADDR_IPV6); +} + static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct tcp_metrics_block *tm; - struct inetpeer_addr daddr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct sk_buff *msg; struct net *net = genl_info_net(info); void *reply; int ret; + bool src = true; ret = parse_nl_addr(info, &daddr, &hash, 0); if (ret < 0) return ret; + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -929,7 +951,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_daddr, &daddr)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -984,23 +1007,28 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) struct tcpm_hash_bucket *hb; struct tcp_metrics_block *tm, *tmlist = NULL; struct tcp_metrics_block __rcu **pp; - struct inetpeer_addr daddr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net = genl_info_net(info); int ret; + bool src = true; ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; if (ret > 0) return tcp_metrics_flush_all(net); + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); hb = net->ipv4.tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { - if (addr_same(&tm->tcpm_daddr, &daddr)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { *pp = tm->tcpm_next; tm->tcpm_next = tmlist; tmlist = tm; -- cgit v1.2.3-70-g09d2 From 00ca9c5b2b11d44eaf20a4b647efc999734323ec Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 21 Jan 2014 13:30:26 +0100 Subject: tcp: metrics: Fix rcu-race when deleting multiple entries In bbf852b96ebdc6d1 I introduced the tmlist, which allows to delete multiple entries from the cache that match a specified destination if no source-IP is specified. However, as the cache is an RCU-list, we should not create this tmlist, as it will change the tcpm_next pointer of the element that will be deleted and so a thread iterating over the cache's entries while holding the RCU-lock might get "redirected" to this tmlist. This patch fixes this, by reverting back to the old behavior prior to bbf852b96ebdc6d1, which means that we simply change the tcpm_next pointer of the previous element (pp) to jump over the one we are deleting. The difference is that we call kfree_rcu() directly on the cache entry, which allows us to delete multiple entries from the list. Fixes: bbf852b96ebdc6d1 (tcp: metrics: Delete all entries matching a certain destination) Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index fa950941de6..9ae48b4a37d 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1019,13 +1019,13 @@ static int tcp_metrics_flush_all(struct net *net) static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct tcpm_hash_bucket *hb; - struct tcp_metrics_block *tm, *tmlist = NULL; + struct tcp_metrics_block *tm; struct tcp_metrics_block __rcu **pp; struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net = genl_info_net(info); int ret; - bool src = true; + bool src = true, found = false; ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) @@ -1044,19 +1044,15 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) if (addr_same(&tm->tcpm_daddr, &daddr) && (!src || addr_same(&tm->tcpm_saddr, &saddr))) { *pp = tm->tcpm_next; - tm->tcpm_next = tmlist; - tmlist = tm; + kfree_rcu(tm, rcu_head); + found = true; } else { pp = &tm->tcpm_next; } } spin_unlock_bh(&tcp_metrics_lock); - if (!tmlist) + if (!found) return -ESRCH; - for (tm = tmlist; tm; tm = tmlist) { - tmlist = tm->tcpm_next; - kfree_rcu(tm, rcu_head); - } return 0; } -- cgit v1.2.3-70-g09d2 From 3ad88cf70af79e6f19c4f89dd85453ba4fdf425e Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 22 Jan 2014 13:58:44 +0100 Subject: tcp: metrics: Handle v6/v4-mapped sockets in tcp-metrics A socket may be v6/v4-mapped. In that case sk->sk_family is AF_INET6, but the IP being used is actually an IPv4-address. Current's tcp-metrics will thus represent it as an IPv6-address: root@server:~# ip tcp_metrics ::ffff:10.1.1.2 age 22.920sec rtt 18750us rttvar 15000us cwnd 10 10.1.1.2 age 47.970sec rtt 16250us rttvar 10000us cwnd 10 This patch modifies the tcp-metrics so that they are able to handle the v6/v4-mapped sockets correctly. Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 64 +++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9ae48b4a37d..d547075d830 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -274,24 +274,32 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock unsigned int hash; struct net *net; - saddr.family = tw->tw_family; - daddr.family = tw->tw_family; - switch (daddr.family) { - case AF_INET: + if (tw->tw_family == AF_INET) { + saddr.family = AF_INET; saddr.addr.a4 = tw->tw_rcv_saddr; + daddr.family = AF_INET; daddr.addr.a4 = tw->tw_daddr; hash = (__force unsigned int) daddr.addr.a4; - break; + } #if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; - *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; - hash = ipv6_addr_hash(&tw->tw_v6_daddr); - break; + else if (tw->tw_family == AF_INET6) { + if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { + saddr.family = AF_INET; + saddr.addr.a4 = tw->tw_rcv_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } else { + saddr.family = AF_INET6; + *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + daddr.family = AF_INET6; + *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; + hash = ipv6_addr_hash(&tw->tw_v6_daddr); + } + } #endif - default: + else return NULL; - } net = twsk_net(tw); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); @@ -314,24 +322,32 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, unsigned int hash; struct net *net; - saddr.family = sk->sk_family; - daddr.family = sk->sk_family; - switch (daddr.family) { - case AF_INET: + if (sk->sk_family == AF_INET) { + saddr.family = AF_INET; saddr.addr.a4 = inet_sk(sk)->inet_saddr; + daddr.family = AF_INET; daddr.addr.a4 = inet_sk(sk)->inet_daddr; hash = (__force unsigned int) daddr.addr.a4; - break; + } #if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; - *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; - hash = ipv6_addr_hash(&sk->sk_v6_daddr); - break; + else if (sk->sk_family == AF_INET6) { + if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { + saddr.family = AF_INET; + saddr.addr.a4 = inet_sk(sk)->inet_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } else { + saddr.family = AF_INET6; + *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + daddr.family = AF_INET6; + *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; + hash = ipv6_addr_hash(&sk->sk_v6_daddr); + } + } #endif - default: + else return NULL; - } net = dev_net(dst->dev); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); -- cgit v1.2.3-70-g09d2