From 8f22ba61b5d730a870cd6b10d299d23280d060fa Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 26 Jan 2014 11:39:26 +0000 Subject: RxRPC: do not unlock unheld spinlock in rxrpc_connect_exclusive() If rx->conn is not NULL, rxrpc_connect_exclusive() does not acquire the transport's client lock, but it still releases it. The patch adds locking of the spinlock to this path. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David Howells --- net/rxrpc/ar-connection.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 4106ca95ec8..7bf5b5b9e8b 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -381,6 +381,8 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, rxrpc_assign_connection_id(conn); rx->conn = conn; + } else { + spin_lock(&trans->client_lock); } /* we've got a connection with a free channel and we can now attach the -- cgit v1.2.3-70-g09d2 From 24a9981ee9c7266fa171bbece62062bf78c4d246 Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Sun, 26 Jan 2014 11:39:28 +0000 Subject: af_rxrpc: Avoid setting up double-free on checksum error skb_kill_datagram() does not dequeue the skb when MSG_PEEK is unset. This leaves a free'd skb on the queue, resulting a double-free later. Without this, the following oops can occur: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] skb_dequeue+0x47/0x70 PGD 0 Oops: 0002 [#1] SMP Modules linked in: af_rxrpc ... CPU: 0 PID: 1191 Comm: listen Not tainted 3.12.0+ #4 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801183536b0 ti: ffff880035c92000 task.ti: ffff880035c92000 RIP: 0010:[] skb_dequeue+0x47/0x70 RSP: 0018:ffff880035c93db8 EFLAGS: 00010097 RAX: 0000000000000246 RBX: ffff8800d2754b00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff8800d254c084 RBP: ffff880035c93dd0 R08: ffff880035c93cf0 R09: ffff8800d968f270 R10: 0000000000000000 R11: 0000000000000293 R12: ffff8800d254c070 R13: ffff8800d254c084 R14: ffff8800cd861240 R15: ffff880119b39720 FS: 00007f37a969d740(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 00000000d4413000 CR4: 00000000000006f0 Stack: ffff8800d254c000 ffff8800d254c070 ffff8800d254c2c0 ffff880035c93df8 ffffffffa041a5b8 ffff8800cd844c80 ffffffffa04385a0 ffff8800cd844cb0 ffff880035c93e18 ffffffff81546cef ffff8800d45fea00 0000000000000008 Call Trace: [] rxrpc_release+0x128/0x2e0 [af_rxrpc] [] sock_release+0x1f/0x80 [] sock_close+0x12/0x20 [] __fput+0xe1/0x230 [] ____fput+0xe/0x10 [] task_work_run+0xbc/0xe0 [] do_exit+0x2be/0xa10 [] ? do_munmap+0x297/0x3b0 [] do_group_exit+0x3f/0xa0 [] SyS_exit_group+0x14/0x20 [] system_call_fastpath+0x16/0x1b Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-recvmsg.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 898492a8d61..64cba2e3515 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -353,6 +353,10 @@ csum_copy_error: if (continue_call) rxrpc_put_call(continue_call); rxrpc_kill_skb(skb); + if (!(flags & MSG_PEEK)) { + if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) + BUG(); + } skb_kill_datagram(&rx->sk, skb, flags); rxrpc_put_call(call); return -EAGAIN; -- cgit v1.2.3-70-g09d2 From 1ea427359dde1573815e19c411ce08fdf0c42cfe Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Sun, 26 Jan 2014 11:39:31 +0000 Subject: af_rxrpc: Handle frames delivered from another VM On input, CHECKSUM_PARTIAL should be treated the same way as CHECKSUM_UNNECESSARY. See include/linux/skbuff.h Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-recvmsg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 64cba2e3515..34b5490dde6 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -180,7 +180,8 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (copy > len - copied) copy = len - copied; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->ip_summed == CHECKSUM_UNNECESSARY || + skb->ip_summed == CHECKSUM_PARTIAL) { ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy); } else { -- cgit v1.2.3-70-g09d2 From de960aa9ab4decc3304959f69533eef64d05d8e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 26 Jan 2014 10:58:16 +0100 Subject: net: add and use skb_gso_transport_seglen() This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to skbuff core so it may be reused by upcoming ip forwarding path patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 25 +++++++++++++++++++++++++ net/sched/sch_tbf.c | 13 +++---------- 3 files changed, 29 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f689e62e4c..f589c9af8cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2456,6 +2456,7 @@ void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct skb_checksum_ops { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8f519dbb358..9ae6d11374d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #ifdef CONFIG_NET_CLS_ACT #include @@ -3916,3 +3918,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset_trace(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len; + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len = tcp_hdrlen(skb); + else + hdr_len = sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index fbba5b0ec12..1cb413fead8 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Simple Token Bucket Filter. @@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) */ -static unsigned int skb_gso_seglen(const struct sk_buff *skb) +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - const struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); - return hdr_len + shinfo->gso_size; + return hdr_len + skb_gso_transport_seglen(skb); } /* GSO packet is too big, segment it so that tbf can transmit @@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) + if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } -- cgit v1.2.3-70-g09d2 From 27d79f3b1071b2a2d58443a130e92c381c838e5d Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 27 Jan 2014 12:13:57 +0530 Subject: net: ipv4: Use PTR_ERR_OR_ZERO PTR_RET is deprecated. Use PTR_ERR_OR_ZERO instead. While at it also include missing err.h header. Signed-off-by: Sachin Kamat Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c0e3cb72ad7..bd28f386bd0 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -930,7 +931,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, } rtnl_unlock(); - return PTR_RET(itn->fb_tunnel_dev); + return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -- cgit v1.2.3-70-g09d2 From a452ce345d63ddf92cd101e4196569f8718ad319 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Mon, 27 Jan 2014 10:33:18 +0100 Subject: net: Fix memory leak if TPROXY used with TCP early demux I see a memory leak when using a transparent HTTP proxy using TPROXY together with TCP early demux and Kernel v3.8.13.15 (Ubuntu stable): unreferenced object 0xffff88008cba4a40 (size 1696): comm "softirq", pid 0, jiffies 4294944115 (age 8907.520s) hex dump (first 32 bytes): 0a e0 20 6a 40 04 1b 37 92 be 32 e2 e8 b4 00 00 .. j@..7..2..... 02 00 07 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmem_cache_alloc+0xad/0xb9 [] sk_prot_alloc+0x29/0xc5 [] sk_clone_lock+0x14/0x283 [] inet_csk_clone_lock+0xf/0x7b [] netlink_broadcast+0x14/0x16 [] tcp_create_openreq_child+0x1b/0x4c3 [] tcp_v4_syn_recv_sock+0x38/0x25d [] tcp_check_req+0x25c/0x3d0 [] tcp_v4_do_rcv+0x287/0x40e [] ip_route_input_noref+0x843/0xa55 [] tcp_v4_rcv+0x4c9/0x725 [] ip_local_deliver_finish+0xe9/0x154 [] __netif_receive_skb+0x4b2/0x514 [] process_backlog+0xee/0x1c5 [] net_rx_action+0xa7/0x200 [] add_interrupt_randomness+0x39/0x157 But there are many more, resulting in the machine going OOM after some days. From looking at the TPROXY code, and with help from Florian, I see that the memory leak is introduced in tcp_v4_early_demux(): void tcp_v4_early_demux(struct sk_buff *skb) { /* ... */ iph = ip_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif); if (sk) { skb->sk = sk; where the socket is assigned unconditionally to skb->sk, also bumping the refcnt on it. This is problematic, because in our case the skb has already a socket assigned in the TPROXY target. This then results in the leak I see. The very same issue seems to be with IPv6, but haven't tested. Reviewed-by: Florian Westphal Signed-off-by: Holger Eitzenberger Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 2 +- net/ipv6/ip6_input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 054a3e97d82..3d4da2c16b6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,7 +314,7 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct net_protocol *ipprot; int protocol = iph->protocol; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 302d6fb1ff2..51d54dc376f 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); -- cgit v1.2.3-70-g09d2 From ce60e0c4df5f95086d5c2662c5cfa0beb8181c6d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 7 Jan 2014 12:52:43 +1100 Subject: net: 6lowpan: fixup for code movement Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/ieee802154/6lowpan_iphc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c index 083f905bf10..860aa2d445b 100644 --- a/net/ieee802154/6lowpan_iphc.c +++ b/net/ieee802154/6lowpan_iphc.c @@ -678,7 +678,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, hc06_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); + memcpy(hc06_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ *hc06_ptr = tmp; hc06_ptr += 4; -- cgit v1.2.3-70-g09d2 From c0c0c50ff7c3e331c90bab316d21f724fb9e1994 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Tue, 28 Jan 2014 11:49:43 +0800 Subject: net: gre: use icmp_hdr() to get inner ip header When dealing with icmp messages, the skb->data points the ip header that triggered the sending of the icmp message. In gre_cisco_err(), the parse_gre_header() is called, and the iptunnel_pull_header() is called to pull the skb at the end of the parse_gre_header(), so the skb->data doesn't point the inner ip header. Unfortunately, the ipgre_err still needs those ip addresses in inner ip header to look up tunnel by ip_tunnel_lookup(). So just use icmp_hdr() to get inner ip header instead of skb->data. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e7a92fdb36f..ec4f762efda 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -178,7 +178,7 @@ static int ipgre_err(struct sk_buff *skb, u32 info, else itn = net_generic(net, ipgre_net_id); - iph = (const struct iphdr *)skb->data; + iph = (const struct iphdr *)(icmp_hdr(skb) + 1); t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->daddr, iph->saddr, tpi->key); -- cgit v1.2.3-70-g09d2 From 0f1a24c9a9f4682dd61f5c39b9952f915c5e952c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Jan 2014 16:30:52 -0500 Subject: llc: remove noisy WARN from llc_mac_hdr_init Sending malformed llc packets triggers this spew, which seems excessive. WARNING: CPU: 1 PID: 6917 at net/llc/llc_output.c:46 llc_mac_hdr_init+0x85/0x90 [llc]() device type not supported: 0 CPU: 1 PID: 6917 Comm: trinity-c1 Not tainted 3.13.0+ #95 0000000000000009 00000000007e257d ffff88009232fbe8 ffffffffac737325 ffff88009232fc30 ffff88009232fc20 ffffffffac06d28d ffff88020e07f180 ffff88009232fec0 00000000000000c8 0000000000000000 ffff88009232fe70 Call Trace: [] dump_stack+0x4e/0x7a [] warn_slowpath_common+0x7d/0xa0 [] warn_slowpath_fmt+0x5c/0x80 [] llc_mac_hdr_init+0x85/0x90 [llc] [] llc_build_and_send_ui_pkt+0x79/0x90 [llc] [] llc_ui_sendmsg+0x23a/0x400 [llc2] [] sock_sendmsg+0x9c/0xe0 [] ? might_fault+0x47/0x50 [] SYSC_sendto+0x121/0x1c0 [] ? syscall_trace_enter+0x207/0x270 [] SyS_sendto+0xe/0x10 [] tracesys+0xdd/0xe2 Until 2009, this was a printk, when it was changed in bf9ae5386bc: "llc: use dev_hard_header". Let userland figure out what -EINVAL means by itself. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/llc/llc_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index 2dae8a5df23..94425e42121 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -43,7 +43,7 @@ int llc_mac_hdr_init(struct sk_buff *skb, rc = 0; break; default: - WARN(1, "device type not supported: %d\n", skb->dev->type); + break; } return rc; } -- cgit v1.2.3-70-g09d2 From 7fceb4de75f993a598d27af835e87b19b8be514e Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 29 Jan 2014 01:05:28 +0900 Subject: net: Fix warning on make htmldocs caused by skbuff.c This patch fixed following Warning while executing "make htmldocs". Warning(/net/core/skbuff.c:2164): No description found for parameter 'from' Warning(/net/core/skbuff.c:2164): Excess function parameter 'source' description in 'skb_zerocopy' Replace "@source" with "@from" fixed the warning. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9ae6d11374d..5976ef0846b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2121,7 +2121,7 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer - * @source: source buffer + * @from: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * -- cgit v1.2.3-70-g09d2