From a2bd1140a264b561e38d99e656cd843c2d840e86 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 4 Apr 2012 16:10:46 -0700 Subject: netdma: adding alignment check for NETDMA ops This is the fallout from adding memcpy alignment workaround for certain IOATDMA hardware. NetDMA will only use DMA engine that can handle byte align ops. Acked-by: David S. Miller Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 22ef5f9fd2f..8712c5d4f91 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1450,7 +1450,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if ((available < target) && (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && - dma_find_channel(DMA_MEMCPY)) { + net_dma_find_channel()) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); @@ -1665,7 +1665,7 @@ do_prequeue: if (!(flags & MSG_TRUNC)) { #ifdef CONFIG_NET_DMA if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); + tp->ucopy.dma_chan = net_dma_find_channel(); if (tp->ucopy.dma_chan) { tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( -- cgit v1.2.3-70-g09d2 From 5fb84b1428b271f8767e0eb3fcd7231896edfaa4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Apr 2012 00:56:42 +0000 Subject: tcp: restore correct limit Commit c43b874d5d714f (tcp: properly initialize tcp memory limits) tried to fix a regression added in commits 4acb4190 & 3dc43e3, but still get it wrong. Result is machines with low amount of memory have too small tcp_rmem[2] value and slow tcp receives : Per socket limit being 1/1024 of memory instead of 1/128 in old kernels, so rcv window is capped to small values. Fix this to match comment and previous behavior. Signed-off-by: Eric Dumazet Cc: Jason Wang Cc: Glauber Costa Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5d54ed30e82..7758a83f98f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3302,8 +3302,7 @@ void __init tcp_init(void) tcp_init_mem(&init_net); /* Set per-socket limits to no more than 1/128 the pressure threshold */ - limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); - limit = max(limit, 128UL); + limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; -- cgit v1.2.3-70-g09d2 From a21d45726acacc963d8baddf74607d9b74e2b723 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Apr 2012 20:30:48 +0000 Subject: tcp: avoid order-1 allocations on wifi and tx path Marc Merlin reported many order-1 allocations failures in TX path on its wireless setup, that dont make any sense with MTU=1500 network, and non SG capable hardware. After investigation, it turns out TCP uses sk_stream_alloc_skb() and used as a convention skb_tailroom(skb) to know how many bytes of data payload could be put in this skb (for non SG capable devices) Note : these skb used kmalloc-4096 (MTU=1500 + MAX_HEADER + sizeof(struct skb_shared_info) being above 2048) Later, mac80211 layer need to add some bytes at the tail of skb (IEEE80211_ENCRYPT_TAILROOM = 18 bytes) and since no more tailroom is available has to call pskb_expand_head() and request order-1 allocations. This patch changes sk_stream_alloc_skb() so that only sk->sk_prot->max_header bytes of headroom are reserved, and use a new skb field, avail_size to hold the data payload limit. This way, order-0 allocations done by TCP stack can leave more than 2 KB of tailroom and no more allocation is performed in mac80211 layer (or any layer needing some tailroom) avail_size is unioned with mark/dropcount, since mark will be set later in IP stack for output packets. Therefore, skb size is unchanged. Reported-by: Marc MERLIN Tested-by: Marc MERLIN Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_output.c | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 33370271b8b..70a3f8d4911 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -481,6 +481,7 @@ struct sk_buff { union { __u32 mark; __u32 dropcount; + __u32 avail_size; }; sk_buff_data_t transport_header; @@ -1365,6 +1366,18 @@ static inline int skb_tailroom(const struct sk_buff *skb) return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; } +/** + * skb_availroom - bytes at buffer end + * @skb: buffer to check + * + * Return the number of bytes of free space at the tail of an sk_buff + * allocated by sk_stream_alloc() + */ +static inline int skb_availroom(const struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len; +} + /** * skb_reserve - adjust headroom * @skb: buffer to alter diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7758a83f98f..a5daa211a8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -701,11 +701,12 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { if (sk_wmem_schedule(sk, skb->truesize)) { + skb_reserve(skb, sk->sk_prot->max_header); /* * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb_reserve(skb, skb_tailroom(skb) - size); + skb->avail_size = size; return skb; } __kfree_skb(skb); @@ -995,10 +996,9 @@ new_segment: copy = seglen; /* Where to copy to? */ - if (skb_tailroom(skb) > 0) { + if (skb_availroom(skb) > 0) { /* We have some space in skb head. Superb! */ - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, from, copy); if (err) goto do_fault; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 364784a9193..376b2cfbb68 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2060,7 +2060,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, /* Punt if not enough space exists in the first SKB for * the data in the second */ - if (skb->len > skb_tailroom(to)) + if (skb->len > skb_availroom(to)) break; if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) -- cgit v1.2.3-70-g09d2