From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Sep 2014 15:49:26 +0200 Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable As in IPv6 people might increase the igmp query robustness variable to make sure unsolicited state change reports aren't lost on the network. Add and document this new knob to igmp code. RFCs allow tuning this parameter back to first IGMP RFC, so we also use this setting for all counters, including source specific multicast. Also take over sysctl value when upping the interface and don't reuse the last one seen on the interface. Cc: Flavio Leitner Signed-off-by: Hannes Frederic Sowa Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net/ipv4/sysctl_net_ipv4.c') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 79a007c5255..45d156dacd6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, -- cgit v1.2.3-70-g09d2 From 49a601589caaf0e93194c0cc9b4ecddbe75dd2d5 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Fri, 5 Sep 2014 15:09:03 +0200 Subject: net/ipv4: bind ip_nonlocal_bind to current netns net.ipv4.ip_nonlocal_bind sysctl was global to all network namespaces. This patch allows to set a different value for each network namespace. Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- include/net/ip.h | 2 -- include/net/netns/ipv4.h | 1 + net/ipv4/af_inet.c | 6 +----- net/ipv4/ping.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv6/af_inet6.c | 2 +- net/sctp/protocol.c | 2 +- 7 files changed, 12 insertions(+), 17 deletions(-) (limited to 'net/ipv4/sysctl_net_ipv4.c') diff --git a/include/net/ip.h b/include/net/ip.h index c8fd6112bd0..14bfc8e1bcf 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -229,8 +229,6 @@ static inline int inet_is_local_reserved_port(struct net *net, int port) } #endif -extern int sysctl_ip_nonlocal_bind; - /* From inetpeer.c */ extern int inet_peer_threshold; extern int inet_peer_minttl; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index aec5e12f9f1..24945cefc4f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -76,6 +76,7 @@ struct netns_ipv4 { int sysctl_tcp_ecn; int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; + int sysctl_ip_nonlocal_bind; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d156b3c5f36..b537bd94906 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -418,10 +418,6 @@ int inet_release(struct socket *sock) } EXPORT_SYMBOL(inet_release); -/* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind __read_mostly; -EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); - int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; @@ -461,7 +457,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * is temporarily down) */ err = -EADDRNOTAVAIL; - if (!sysctl_ip_nonlocal_bind && + if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index a3c59a077a5..57f7c980413 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -311,7 +311,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) chk_addr_ret = RTN_LOCAL; - if ((sysctl_ip_nonlocal_bind == 0 && + if ((net->ipv4.sysctl_ip_nonlocal_bind == 0 && isk->freebind == 0 && isk->transparent == 0 && chk_addr_ret != RTN_LOCAL) || chk_addr_ret == RTN_MULTICAST || diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 45d156dacd6..1599966f463 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -285,13 +285,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, - { - .procname = "ip_nonlocal_bind", - .data = &sysctl_ip_nonlocal_bind, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_syn_retries", .data = &sysctl_tcp_syn_retries, @@ -848,6 +841,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ip_nonlocal_bind", + .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b9393e6a21f..e4865a3ebe1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -302,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type(net, v4addr); - if (!sysctl_ip_nonlocal_bind && + if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6240834f4b9..9d2c6c9facb 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -366,7 +366,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !sysctl_ip_nonlocal_bind) + !net->ipv4.sysctl_ip_nonlocal_bind) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) -- cgit v1.2.3-70-g09d2 From 4cdf507d54525842dfd9f6313fdafba039084046 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2014 07:38:40 -0700 Subject: icmp: add a global rate limitation Current ICMP rate limiting uses inetpeer cache, which is an RBL tree protected by a lock, meaning that hosts can be stuck hard if all cpus want to check ICMP limits. When say a DNS or NTP server process is restarted, inetpeer tree grows quick and machine comes to its knees. iptables can not help because the bottleneck happens before ICMP messages are even cooked and sent. This patch adds a new global limitation, using a token bucket filter, controlled by two new sysctl : icmp_msgs_per_sec - INTEGER Limit maximal number of ICMP packets sent per second from this host. Only messages whose type matches icmp_ratemask are controlled by this limit. Default: 1000 icmp_msgs_burst - INTEGER icmp_msgs_per_sec controls number of ICMP packets sent per second, while icmp_msgs_burst controls the burst size of these packets. Default: 50 Note that if we really want to send millions of ICMP messages per second, we might extend idea and infra added in commit 04ca6973f7c1a ("ip: make IP identifiers less predictable") : add a token bucket in the ip_idents hash and no longer rely on inetpeer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 13 +++++++ include/net/ip.h | 4 +++ net/ipv4/icmp.c | 64 +++++++++++++++++++++++++++++++--- net/ipv4/sysctl_net_ipv4.c | 16 +++++++++ net/ipv6/icmp.c | 20 ++++++----- 5 files changed, 105 insertions(+), 12 deletions(-) (limited to 'net/ipv4/sysctl_net_ipv4.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 1b5581a30d7..c7a81ace35d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -769,8 +769,21 @@ icmp_ratelimit - INTEGER icmp_ratemask (see below) to specific targets. 0 to disable any limiting, otherwise the minimal space between responses in milliseconds. + Note that another sysctl, icmp_msgs_per_sec limits the number + of ICMP packets sent on all targets. Default: 1000 +icmp_msgs_per_sec - INTEGER + Limit maximal number of ICMP packets sent per second from this host. + Only messages whose type matches icmp_ratemask (see below) are + controlled by this limit. + Default: 1000 + +icmp_msgs_burst - INTEGER + icmp_msgs_per_sec controls number of ICMP packets sent per second, + while icmp_msgs_burst controls the burst size of these packets. + Default: 50 + icmp_ratemask - INTEGER Mask made of ICMP types for which rates are being limited. Significant bits: IHGFEDCBA9876543210 diff --git a/include/net/ip.h b/include/net/ip.h index 14bfc8e1bcf..fcd9068fb8c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -548,6 +548,10 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); +bool icmp_global_allow(void); +extern int sysctl_icmp_msgs_per_sec; +extern int sysctl_icmp_msgs_burst; + #ifdef CONFIG_PROC_FS int ip_misc_proc_init(void); #endif diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ea7d4afe820..5882f584910 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -231,12 +231,62 @@ static inline void icmp_xmit_unlock(struct sock *sk) spin_unlock_bh(&sk->sk_lock.slock); } +int sysctl_icmp_msgs_per_sec __read_mostly = 1000; +int sysctl_icmp_msgs_burst __read_mostly = 50; + +static struct { + spinlock_t lock; + u32 credit; + u32 stamp; +} icmp_global = { + .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), +}; + +/** + * icmp_global_allow - Are we allowed to send one more ICMP message ? + * + * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec. + * Returns false if we reached the limit and can not send another packet. + * Note: called with BH disabled + */ +bool icmp_global_allow(void) +{ + u32 credit, delta, incr = 0, now = (u32)jiffies; + bool rc = false; + + /* Check if token bucket is empty and cannot be refilled + * without taking the spinlock. + */ + if (!icmp_global.credit) { + delta = min_t(u32, now - icmp_global.stamp, HZ); + if (delta < HZ / 50) + return false; + } + + spin_lock(&icmp_global.lock); + delta = min_t(u32, now - icmp_global.stamp, HZ); + if (delta >= HZ / 50) { + incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + if (incr) + icmp_global.stamp = now; + } + credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + if (credit) { + credit--; + rc = true; + } + icmp_global.credit = credit; + spin_unlock(&icmp_global.lock); + return rc; +} +EXPORT_SYMBOL(icmp_global_allow); + /* * Send an ICMP frame. */ -static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) +static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, + struct flowi4 *fl4, int type, int code) { struct dst_entry *dst = &rt->dst; bool rc = true; @@ -253,8 +303,14 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); + if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + goto out; + + rc = false; + if (icmp_global_allow()) { + struct inet_peer *peer; + + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); if (peer) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1599966f463..8a25509c35b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -730,6 +730,22 @@ static struct ctl_table ipv4_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "icmp_msgs_per_sec", + .data = &sysctl_icmp_msgs_per_sec, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, + { + .procname = "icmp_msgs_burst", + .data = &sysctl_icmp_msgs_burst, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, { .procname = "udp_mem", .data = &sysctl_udp_mem, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 394bb824fe4..141e1f3ab74 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -170,11 +170,11 @@ static bool is_ineligible(const struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi6 *fl6) +static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + struct flowi6 *fl6) { - struct dst_entry *dst; struct net *net = sock_net(sk); + struct dst_entry *dst; bool res = false; /* Informational messages are not limited. */ @@ -199,16 +199,20 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; - struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - res = inet_peer_xrlim_allow(peer, tmo); - if (peer) - inet_putpeer(peer); + if (icmp_global_allow()) { + struct inet_peer *peer; + + peer = inet_getpeer_v6(net->ipv6.peers, + &rt->rt6i_dst.addr, 1); + res = inet_peer_xrlim_allow(peer, tmo); + if (peer) + inet_putpeer(peer); + } } dst_release(dst); return res; -- cgit v1.2.3-70-g09d2 From 7bced397510ab569d31de4c70b39e13355046387 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Dec 2013 12:37:29 -0800 Subject: net_dma: simple removal Per commit "77873803363c net_dma: mark broken" net_dma is no longer used and there is no plan to fix it. This is the mechanical removal of bits in CONFIG_NET_DMA ifdef guards. Reverting the remainder of the net_dma induced changes is deferred to subsequent patches. Marked for stable due to Roman's report of a memory leak in dma_pin_iovec_pages(): https://lkml.org/lkml/2014/9/3/177 Cc: Dave Jiang Cc: Vinod Koul Cc: David Whipple Cc: Alexander Duyck Cc: Reported-by: Roman Gushchin Acked-by: David S. Miller Signed-off-by: Dan Williams --- Documentation/ABI/removed/net_dma | 8 + Documentation/networking/ip-sysctl.txt | 6 - drivers/dma/Kconfig | 12 -- drivers/dma/Makefile | 1 - drivers/dma/dmaengine.c | 104 ------------ drivers/dma/ioat/dma.c | 1 - drivers/dma/ioat/dma.h | 7 - drivers/dma/ioat/dma_v2.c | 1 - drivers/dma/ioat/dma_v3.c | 1 - drivers/dma/iovlock.c | 280 --------------------------------- include/linux/dmaengine.h | 22 +-- include/linux/skbuff.h | 8 +- include/linux/tcp.h | 8 - include/net/netdma.h | 32 ---- include/net/sock.h | 19 +-- include/net/tcp.h | 8 - kernel/sysctl_binary.c | 1 - net/core/Makefile | 1 - net/core/dev.c | 10 -- net/core/sock.c | 6 - net/core/user_dma.c | 131 --------------- net/dccp/proto.c | 4 +- net/ipv4/sysctl_net_ipv4.c | 9 -- net/ipv4/tcp.c | 147 ++--------------- net/ipv4/tcp_input.c | 61 ------- net/ipv4/tcp_ipv4.c | 18 +-- net/ipv6/tcp_ipv6.c | 13 +- net/llc/af_llc.c | 10 +- 28 files changed, 35 insertions(+), 894 deletions(-) create mode 100644 Documentation/ABI/removed/net_dma delete mode 100644 drivers/dma/iovlock.c delete mode 100644 include/net/netdma.h delete mode 100644 net/core/user_dma.c (limited to 'net/ipv4/sysctl_net_ipv4.c') diff --git a/Documentation/ABI/removed/net_dma b/Documentation/ABI/removed/net_dma new file mode 100644 index 00000000000..a173aecc2f1 --- /dev/null +++ b/Documentation/ABI/removed/net_dma @@ -0,0 +1,8 @@ +What: tcp_dma_copybreak sysctl +Date: Removed in kernel v3.13 +Contact: Dan Williams +Description: + Formerly the lower limit, in bytes, of the size of socket reads + that will be offloaded to a DMA copy engine. Removed due to + coherency issues of the cpu potentially touching the buffers + while dma is in flight. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab42c95f998..ea8f3b182e7 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -582,12 +582,6 @@ tcp_workaround_signed_windows - BOOLEAN not receive a window scaling option from them. Default: 0 -tcp_dma_copybreak - INTEGER - Lower limit, in bytes, of the size of socket reads that will be - offloaded to a DMA copy engine, if one is present in the system - and CONFIG_NET_DMA is enabled. - Default: 4096 - tcp_thin_linear_timeouts - BOOLEAN Enable dynamic triggering of linear timeouts for thin streams. If set, a check is performed upon retransmission by timeout to diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 605b016bcea..6b5f37e01a7 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -368,18 +368,6 @@ config DMA_OF comment "DMA Clients" depends on DMA_ENGINE -config NET_DMA - bool "Network: TCP receive copy offload" - depends on DMA_ENGINE && NET - default (INTEL_IOATDMA || FSL_DMA) - depends on BROKEN - help - This enables the use of DMA engines in the network stack to - offload receive copy-to-user operations, freeing CPU cycles. - - Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise - say N. - config ASYNC_TX_DMA bool "Async_tx: Offload support for the async_tx api" depends on DMA_ENGINE diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index a029d0f4a1b..0c9dc754932 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o obj-$(CONFIG_DMA_ACPI) += acpi-dma.o obj-$(CONFIG_DMA_OF) += of-dma.o -obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o obj-$(CONFIG_DMATEST) += dmatest.o obj-$(CONFIG_INTEL_IOATDMA) += ioat/ diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index ed610b49751..268de183b51 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1084,110 +1084,6 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags) } EXPORT_SYMBOL(dmaengine_get_unmap_data); -/** - * dma_async_memcpy_pg_to_pg - offloaded copy from page to page - * @chan: DMA channel to offload copy to - * @dest_pg: destination page - * @dest_off: offset in page to copy to - * @src_pg: source page - * @src_off: offset in page to copy from - * @len: length - * - * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus - * address according to the DMA mapping API rules for streaming mappings. - * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident - * (kernel memory or locked user space pages). - */ -dma_cookie_t -dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, - unsigned int dest_off, struct page *src_pg, unsigned int src_off, - size_t len) -{ - struct dma_device *dev = chan->device; - struct dma_async_tx_descriptor *tx; - struct dmaengine_unmap_data *unmap; - dma_cookie_t cookie; - unsigned long flags; - - unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOWAIT); - if (!unmap) - return -ENOMEM; - - unmap->to_cnt = 1; - unmap->from_cnt = 1; - unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len, - DMA_TO_DEVICE); - unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len, - DMA_FROM_DEVICE); - unmap->len = len; - flags = DMA_CTRL_ACK; - tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0], - len, flags); - - if (!tx) { - dmaengine_unmap_put(unmap); - return -ENOMEM; - } - - dma_set_unmap(tx, unmap); - cookie = tx->tx_submit(tx); - dmaengine_unmap_put(unmap); - - preempt_disable(); - __this_cpu_add(chan->local->bytes_transferred, len); - __this_cpu_inc(chan->local->memcpy_count); - preempt_enable(); - - return cookie; -} -EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); - -/** - * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses - * @chan: DMA channel to offload copy to - * @dest: destination address (virtual) - * @src: source address (virtual) - * @len: length - * - * Both @dest and @src must be mappable to a bus address according to the - * DMA mapping API rules for streaming mappings. - * Both @dest and @src must stay memory resident (kernel memory or locked - * user space pages). - */ -dma_cookie_t -dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, - void *src, size_t len) -{ - return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest), - (unsigned long) dest & ~PAGE_MASK, - virt_to_page(src), - (unsigned long) src & ~PAGE_MASK, len); -} -EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); - -/** - * dma_async_memcpy_buf_to_pg - offloaded copy from address to page - * @chan: DMA channel to offload copy to - * @page: destination page - * @offset: offset in page to copy to - * @kdata: source address (virtual) - * @len: length - * - * Both @page/@offset and @kdata must be mappable to a bus address according - * to the DMA mapping API rules for streaming mappings. - * Both @page/@offset and @kdata must stay memory resident (kernel memory or - * locked user space pages) - */ -dma_cookie_t -dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, - unsigned int offset, void *kdata, size_t len) -{ - return dma_async_memcpy_pg_to_pg(chan, page, offset, - virt_to_page(kdata), - (unsigned long) kdata & ~PAGE_MASK, len); -} -EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); - void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan) { diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index b76c1485933..940c1502a8b 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1222,7 +1222,6 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) err = ioat_probe(device); if (err) return err; - ioat_set_tcp_copy_break(4096); err = ioat_register(device); if (err) return err; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index e982f00a984..d63f68b1aa3 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -214,13 +214,6 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, #define dump_desc_dbg(c, d) \ ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) -static inline void ioat_set_tcp_copy_break(unsigned long copybreak) -{ - #ifdef CONFIG_NET_DMA - sysctl_tcp_dma_copybreak = copybreak; - #endif -} - static inline struct ioat_chan_common * ioat_chan_by_index(struct ioatdma_device *device, int index) { diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 2ce9be49860..695483e6be3 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -900,7 +900,6 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca) err = ioat_probe(device); if (err) return err; - ioat_set_tcp_copy_break(2048); list_for_each_entry(c, &dma->channels, device_node) { chan = to_chan_common(c); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 85971d6e964..895f869d6c2 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1655,7 +1655,6 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) err = ioat_probe(device); if (err) return err; - ioat_set_tcp_copy_break(262144); list_for_each_entry(c, &dma->channels, device_node) { chan = to_chan_common(c); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c deleted file mode 100644 index bb48a57c2fc..00000000000 --- a/drivers/dma/iovlock.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. - * Portions based on net/core/datagram.c and copyrighted by their authors. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ - -/* - * This code allows the net stack to make use of a DMA engine for - * skb to iovec copies. - */ - -#include -#include -#include -#include /* for memcpy_toiovec */ -#include -#include - -static int num_pages_spanned(struct iovec *iov) -{ - return - ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) - - ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT); -} - -/* - * Pin down all the iovec pages needed for len bytes. - * Return a struct dma_pinned_list to keep track of pages pinned down. - * - * We are allocating a single chunk of memory, and then carving it up into - * 3 sections, the latter 2 whose size depends on the number of iovecs and the - * total number of pages, respectively. - */ -struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len) -{ - struct dma_pinned_list *local_list; - struct page **pages; - int i; - int ret; - int nr_iovecs = 0; - int iovec_len_used = 0; - int iovec_pages_used = 0; - - /* don't pin down non-user-based iovecs */ - if (segment_eq(get_fs(), KERNEL_DS)) - return NULL; - - /* determine how many iovecs/pages there are, up front */ - do { - iovec_len_used += iov[nr_iovecs].iov_len; - iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]); - nr_iovecs++; - } while (iovec_len_used < len); - - /* single kmalloc for pinned list, page_list[], and the page arrays */ - local_list = kmalloc(sizeof(*local_list) - + (nr_iovecs * sizeof (struct dma_page_list)) - + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL); - if (!local_list) - goto out; - - /* list of pages starts right after the page list array */ - pages = (struct page **) &local_list->page_list[nr_iovecs]; - - local_list->nr_iovecs = 0; - - for (i = 0; i < nr_iovecs; i++) { - struct dma_page_list *page_list = &local_list->page_list[i]; - - len -= iov[i].iov_len; - - if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) - goto unpin; - - page_list->nr_pages = num_pages_spanned(&iov[i]); - page_list->base_address = iov[i].iov_base; - - page_list->pages = pages; - pages += page_list->nr_pages; - - /* pin pages down */ - down_read(¤t->mm->mmap_sem); - ret = get_user_pages( - current, - current->mm, - (unsigned long) iov[i].iov_base, - page_list->nr_pages, - 1, /* write */ - 0, /* force */ - page_list->pages, - NULL); - up_read(¤t->mm->mmap_sem); - - if (ret != page_list->nr_pages) - goto unpin; - - local_list->nr_iovecs = i + 1; - } - - return local_list; - -unpin: - dma_unpin_iovec_pages(local_list); -out: - return NULL; -} - -void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list) -{ - int i, j; - - if (!pinned_list) - return; - - for (i = 0; i < pinned_list->nr_iovecs; i++) { - struct dma_page_list *page_list = &pinned_list->page_list[i]; - for (j = 0; j < page_list->nr_pages; j++) { - set_page_dirty_lock(page_list->pages[j]); - page_cache_release(page_list->pages[j]); - } - } - - kfree(pinned_list); -} - - -/* - * We have already pinned down the pages we will be using in the iovecs. - * Each entry in iov array has corresponding entry in pinned_list->page_list. - * Using array indexing to keep iov[] and page_list[] in sync. - * Initial elements in iov array's iov->iov_len will be 0 if already copied into - * by another call. - * iov array length remaining guaranteed to be bigger than len. - */ -dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, - struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len) -{ - int iov_byte_offset; - int copy; - dma_cookie_t dma_cookie = 0; - int iovec_idx; - int page_idx; - - if (!chan) - return memcpy_toiovec(iov, kdata, len); - - iovec_idx = 0; - while (iovec_idx < pinned_list->nr_iovecs) { - struct dma_page_list *page_list; - - /* skip already used-up iovecs */ - while (!iov[iovec_idx].iov_len) - iovec_idx++; - - page_list = &pinned_list->page_list[iovec_idx]; - - iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); - page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) - - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; - - /* break up copies to not cross page boundary */ - while (iov[iovec_idx].iov_len) { - copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); - copy = min_t(int, copy, iov[iovec_idx].iov_len); - - dma_cookie = dma_async_memcpy_buf_to_pg(chan, - page_list->pages[page_idx], - iov_byte_offset, - kdata, - copy); - /* poll for a descriptor slot */ - if (unlikely(dma_cookie < 0)) { - dma_async_issue_pending(chan); - continue; - } - - len -= copy; - iov[iovec_idx].iov_len -= copy; - iov[iovec_idx].iov_base += copy; - - if (!len) - return dma_cookie; - - kdata += copy; - iov_byte_offset = 0; - page_idx++; - } - iovec_idx++; - } - - /* really bad if we ever run out of iovecs */ - BUG(); - return -EFAULT; -} - -dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, - struct dma_pinned_list *pinned_list, struct page *page, - unsigned int offset, size_t len) -{ - int iov_byte_offset; - int copy; - dma_cookie_t dma_cookie = 0; - int iovec_idx; - int page_idx; - int err; - - /* this needs as-yet-unimplemented buf-to-buff, so punt. */ - /* TODO: use dma for this */ - if (!chan || !pinned_list) { - u8 *vaddr = kmap(page); - err = memcpy_toiovec(iov, vaddr + offset, len); - kunmap(page); - return err; - } - - iovec_idx = 0; - while (iovec_idx < pinned_list->nr_iovecs) { - struct dma_page_list *page_list; - - /* skip already used-up iovecs */ - while (!iov[iovec_idx].iov_len) - iovec_idx++; - - page_list = &pinned_list->page_list[iovec_idx]; - - iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); - page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) - - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; - - /* break up copies to not cross page boundary */ - while (iov[iovec_idx].iov_len) { - copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); - copy = min_t(int, copy, iov[iovec_idx].iov_len); - - dma_cookie = dma_async_memcpy_pg_to_pg(chan, - page_list->pages[page_idx], - iov_byte_offset, - page, - offset, - copy); - /* poll for a descriptor slot */ - if (unlikely(dma_cookie < 0)) { - dma_async_issue_pending(chan); - continue; - } - - len -= copy; - iov[iovec_idx].iov_len -= copy; - iov[iovec_idx].iov_base += copy; - - if (!len) - return dma_cookie; - - offset += copy; - iov_byte_offset = 0; - page_idx++; - } - iovec_idx++; - } - - /* really bad if we ever run out of iovecs */ - BUG(); - return -EFAULT; -} diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c5c92d59e53..3e382ecd192 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -903,18 +903,6 @@ static inline void dmaengine_put(void) } #endif -#ifdef CONFIG_NET_DMA -#define net_dmaengine_get() dmaengine_get() -#define net_dmaengine_put() dmaengine_put() -#else -static inline void net_dmaengine_get(void) -{ -} -static inline void net_dmaengine_put(void) -{ -} -#endif - #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() @@ -936,16 +924,8 @@ async_dma_find_channel(enum dma_transaction_type type) return NULL; } #endif /* CONFIG_ASYNC_TX_DMA */ - -dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, - void *dest, void *src, size_t len); -dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, - struct page *page, unsigned int offset, void *kdata, size_t len); -dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, - struct page *dest_pg, unsigned int dest_off, struct page *src_pg, - unsigned int src_off, size_t len); void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, - struct dma_chan *chan); + struct dma_chan *chan); static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5e1e6f2d98c..bdbf7afad6b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -515,11 +514,8 @@ struct sk_buff { /* 6/8 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL - union { - unsigned int napi_id; - dma_cookie_t dma_cookie; - }; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4ad0706d40e..90895b8dc7f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,7 +19,6 @@ #include -#include #include #include #include @@ -169,13 +168,6 @@ struct tcp_sock { struct iovec *iov; int memory; int len; -#ifdef CONFIG_NET_DMA - /* members for async copy */ - struct dma_chan *dma_chan; - int wakeup; - struct dma_pinned_list *pinned_list; - dma_cookie_t dma_cookie; -#endif } ucopy; u32 snd_wl1; /* Sequence for window update */ diff --git a/include/net/netdma.h b/include/net/netdma.h deleted file mode 100644 index 8ba8ce284ee..00000000000 --- a/include/net/netdma.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef NETDMA_H -#define NETDMA_H -#ifdef CONFIG_NET_DMA -#include -#include - -int dma_skb_copy_datagram_iovec(struct dma_chan* chan, - struct sk_buff *skb, int offset, struct iovec *to, - size_t len, struct dma_pinned_list *pinned_list); - -#endif /* CONFIG_NET_DMA */ -#endif /* NETDMA_H */ diff --git a/include/net/sock.h b/include/net/sock.h index b9586a137ca..3353b47f3d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -231,7 +231,6 @@ struct cg_proto; * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_write_queue: Packet sending queue - * @sk_async_wait_queue: DMA copied packets * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward @@ -354,10 +353,6 @@ struct sock { struct sk_filter __rcu *sk_filter; struct socket_wq __rcu *sk_wq; -#ifdef CONFIG_NET_DMA - struct sk_buff_head sk_async_wait_queue; -#endif - #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif @@ -2214,27 +2209,15 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from * @skb: socket buffer to eat - * @copied_early: flag indicating whether DMA operations copied this data early * * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -#ifdef CONFIG_NET_DMA -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early) -{ - __skb_unlink(skb, &sk->sk_receive_queue); - if (!copied_early) - __kfree_skb(skb); - else - __skb_queue_tail(&sk->sk_async_wait_queue, skb); -} -#else -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early) +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } -#endif static inline struct net *sock_net(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8c4dd63134d..2c2f24ffa38 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -267,7 +266,6 @@ extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; -extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; @@ -1023,12 +1021,6 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) tp->ucopy.len = 0; tp->ucopy.memory = 0; skb_queue_head_init(&tp->ucopy.prequeue); -#ifdef CONFIG_NET_DMA - tp->ucopy.dma_chan = NULL; - tp->ucopy.wakeup = 0; - tp->ucopy.pinned_list = NULL; - tp->ucopy.dma_cookie = 0; -#endif } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 653cbbd9e7a..d457005aced 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -390,7 +390,6 @@ static const struct bin_table bin_net_ipv4_table[] = { { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, - { CTL_INT, NET_TCP_DMA_COPYBREAK, "tcp_dma_copybreak" }, { CTL_INT, NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" }, { CTL_INT, NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" }, { CTL_INT, NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" }, diff --git a/net/core/Makefile b/net/core/Makefile index 9628c20acff..5038f1ea034 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -16,7 +16,6 @@ obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o -obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d4d7d..5e37e9abe8c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1266,7 +1266,6 @@ static int __dev_open(struct net_device *dev) clear_bit(__LINK_STATE_START, &dev->state); else { dev->flags |= IFF_UP; - net_dmaengine_get(); dev_set_rx_mode(dev); dev_activate(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -1342,7 +1341,6 @@ static int __dev_close_many(struct list_head *head) ops->ndo_stop(dev); dev->flags &= ~IFF_UP; - net_dmaengine_put(); } return 0; @@ -4405,14 +4403,6 @@ static void net_rx_action(struct softirq_action *h) out: net_rps_action_and_irq_enable(sd); -#ifdef CONFIG_NET_DMA - /* - * There may not be any more sk_buffs coming right now, so push - * any pending DMA copies to hardware - */ - dma_issue_pending_all(); -#endif - return; softnet_break: diff --git a/net/core/sock.c b/net/core/sock.c index c0fc6bdad1e..2f143c3b190 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1452,9 +1452,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); -#ifdef CONFIG_NET_DMA - skb_queue_head_init(&newsk->sk_async_wait_queue); -#endif spin_lock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); @@ -2265,9 +2262,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); -#ifdef CONFIG_NET_DMA - skb_queue_head_init(&sk->sk_async_wait_queue); -#endif sk->sk_send_head = NULL; diff --git a/net/core/user_dma.c b/net/core/user_dma.c deleted file mode 100644 index 1b5fefdb819..00000000000 --- a/net/core/user_dma.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. - * Portions based on net/core/datagram.c and copyrighted by their authors. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ - -/* - * This code allows the net stack to make use of a DMA engine for - * skb to iovec copies. - */ - -#include -#include -#include -#include -#include - -#define NET_DMA_DEFAULT_COPYBREAK 4096 - -int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; -EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); - -/** - * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. - * @skb - buffer to copy - * @offset - offset in the buffer to start copying from - * @iovec - io vector to copy to - * @len - amount of data to copy from buffer to iovec - * @pinned_list - locked iovec buffer data - * - * Note: the iovec is modified during the copy. - */ -int dma_skb_copy_datagram_iovec(struct dma_chan *chan, - struct sk_buff *skb, int offset, struct iovec *to, - size_t len, struct dma_pinned_list *pinned_list) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - struct sk_buff *frag_iter; - dma_cookie_t cookie = 0; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - cookie = dma_memcpy_to_iovec(chan, to, pinned_list, - skb->data + offset, copy); - if (cookie < 0) - goto fault; - len -= copy; - if (len == 0) - goto end; - offset += copy; - } - - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - WARN_ON(start > offset + len); - - end = start + skb_frag_size(frag); - copy = end - offset; - if (copy > 0) { - struct page *page = skb_frag_page(frag); - - if (copy > len) - copy = len; - - cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, - frag->page_offset + offset - start, copy); - if (cookie < 0) - goto fault; - len -= copy; - if (len == 0) - goto end; - offset += copy; - } - start = end; - } - - skb_walk_frags(skb, frag_iter) { - int end; - - WARN_ON(start > offset + len); - - end = start + frag_iter->len; - copy = end - offset; - if (copy > 0) { - if (copy > len) - copy = len; - cookie = dma_skb_copy_datagram_iovec(chan, frag_iter, - offset - start, - to, copy, - pinned_list); - if (cookie < 0) - goto fault; - len -= copy; - if (len == 0) - goto end; - offset += copy; - } - start = end; - } - -end: - if (!len) { - skb->dma_cookie = cookie; - return cookie; - } - -fault: - return -EFAULT; -} diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb892b4f481..f9076f295b1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { @@ -905,7 +905,7 @@ verify_sock_status: len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); break; } while (1); out: diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43..c3d2a48481f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -635,15 +635,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -#ifdef CONFIG_NET_DMA - { - .procname = "tcp_dma_copybreak", - .data = &sysctl_tcp_dma_copybreak, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif { .procname = "tcp_slow_start_after_idle", .data = &sysctl_tcp_slow_start_after_idle, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 97c8f5620c4..28595a364f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -274,7 +274,6 @@ #include #include #include -#include #include #include @@ -1454,39 +1453,6 @@ static void tcp_prequeue_process(struct sock *sk) tp->ucopy.memory = 0; } -#ifdef CONFIG_NET_DMA -static void tcp_service_net_dma(struct sock *sk, bool wait) -{ - dma_cookie_t done, used; - dma_cookie_t last_issued; - struct tcp_sock *tp = tcp_sk(sk); - - if (!tp->ucopy.dma_chan) - return; - - last_issued = tp->ucopy.dma_cookie; - dma_async_issue_pending(tp->ucopy.dma_chan); - - do { - if (dma_async_is_tx_complete(tp->ucopy.dma_chan, - last_issued, &done, - &used) == DMA_COMPLETE) { - /* Safe to free early-copied skbs now */ - __skb_queue_purge(&sk->sk_async_wait_queue); - break; - } else { - struct sk_buff *skb; - while ((skb = skb_peek(&sk->sk_async_wait_queue)) && - (dma_async_is_complete(skb->dma_cookie, done, - used) == DMA_COMPLETE)) { - __skb_dequeue(&sk->sk_async_wait_queue); - kfree_skb(skb); - } - } - } while (wait); -} -#endif - static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1504,7 +1470,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) * splitted a fat GRO packet, while we released socket lock * in skb_splice_bits() */ - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); } return NULL; } @@ -1570,11 +1536,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } if (tcp_hdr(skb)->fin) { - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); ++seq; break; } - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); if (!desc->count) break; tp->copied_seq = seq; @@ -1612,7 +1578,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - bool copied_early = false; struct sk_buff *skb; u32 urg_hole = 0; @@ -1655,28 +1620,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); -#ifdef CONFIG_NET_DMA - tp->ucopy.dma_chan = NULL; - preempt_disable(); - skb = skb_peek_tail(&sk->sk_receive_queue); - { - int available = 0; - - if (skb) - available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); - if ((available < target) && - (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && - net_dma_find_channel()) { - preempt_enable(); - tp->ucopy.pinned_list = - dma_pin_iovec_pages(msg->msg_iov, len); - } else { - preempt_enable(); - } - } -#endif - do { u32 offset; @@ -1807,16 +1750,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Set realtime policy in scheduler __ */ } -#ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) { - if (tp->rcv_wnd == 0 && - !skb_queue_empty(&sk->sk_async_wait_queue)) { - tcp_service_net_dma(sk, true); - tcp_cleanup_rbuf(sk, copied); - } else - dma_async_issue_pending(tp->ucopy.dma_chan); - } -#endif if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1824,11 +1757,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else sk_wait_data(sk, &timeo); -#ifdef CONFIG_NET_DMA - tcp_service_net_dma(sk, false); /* Don't block */ - tp->ucopy.wakeup = 0; -#endif - if (user_recv) { int chunk; @@ -1886,43 +1814,13 @@ do_prequeue: } if (!(flags & MSG_TRUNC)) { -#ifdef CONFIG_NET_DMA - if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = net_dma_find_channel(); - - if (tp->ucopy.dma_chan) { - tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( - tp->ucopy.dma_chan, skb, offset, - msg->msg_iov, used, - tp->ucopy.pinned_list); - - if (tp->ucopy.dma_cookie < 0) { - - pr_alert("%s: dma_cookie < 0\n", - __func__); - - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; - } - - dma_async_issue_pending(tp->ucopy.dma_chan); - - if ((offset + used) == skb->len) - copied_early = true; - - } else -#endif - { - err = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; - } + err = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; } } @@ -1942,19 +1840,15 @@ skip_copy: if (tcp_hdr(skb)->fin) goto found_fin_ok; - if (!(flags & MSG_PEEK)) { - sk_eat_skb(sk, skb, copied_early); - copied_early = false; - } + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ ++*seq; - if (!(flags & MSG_PEEK)) { - sk_eat_skb(sk, skb, copied_early); - copied_early = false; - } + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); break; } while (len > 0); @@ -1977,16 +1871,6 @@ skip_copy: tp->ucopy.len = 0; } -#ifdef CONFIG_NET_DMA - tcp_service_net_dma(sk, true); /* Wait for queue to drain */ - tp->ucopy.dma_chan = NULL; - - if (tp->ucopy.pinned_list) { - dma_unpin_iovec_pages(tp->ucopy.pinned_list); - tp->ucopy.pinned_list = NULL; - } -#endif - /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ @@ -2330,9 +2214,6 @@ int tcp_disconnect(struct sock *sk, int flags) __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); __skb_queue_purge(&tp->out_of_order_queue); -#ifdef CONFIG_NET_DMA - __skb_queue_purge(&sk->sk_async_wait_queue); -#endif inet->inet_dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eeaac399420..1342e9851f9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -73,7 +73,6 @@ #include #include #include -#include int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; @@ -4970,53 +4969,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk, __tcp_checksum_complete_user(sk, skb); } -#ifdef CONFIG_NET_DMA -static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, - int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int dma_cookie; - bool copied_early = false; - - if (tp->ucopy.wakeup) - return false; - - if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = net_dma_find_channel(); - - if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { - - dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, - skb, hlen, - tp->ucopy.iov, chunk, - tp->ucopy.pinned_list); - - if (dma_cookie < 0) - goto out; - - tp->ucopy.dma_cookie = dma_cookie; - copied_early = true; - - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - - if ((tp->ucopy.len == 0) || - (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) || - (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { - tp->ucopy.wakeup = 1; - sk->sk_data_ready(sk, 0); - } - } else if (chunk > 0) { - tp->ucopy.wakeup = 1; - sk->sk_data_ready(sk, 0); - } -out: - return copied_early; -} -#endif /* CONFIG_NET_DMA */ - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5201,14 +5153,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { -#ifdef CONFIG_NET_DMA - if (tp->ucopy.task == current && - sock_owned_by_user(sk) && - tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { - copied_early = 1; - eaten = 1; - } -#endif if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { __set_current_state(TASK_RUNNING); @@ -5274,11 +5218,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (!copied_early || tp->rcv_nxt != tp->rcv_wup) __tcp_ack_snd_check(sk, 0); no_ack: -#ifdef CONFIG_NET_DMA - if (copied_early) - __skb_queue_tail(&sk->sk_async_wait_queue, skb); - else -#endif if (eaten) kfree_skb_partial(skb, fragstolen); sk->sk_data_ready(sk, 0); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf97651049..737c2e270ee 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,7 +72,6 @@ #include #include #include -#include #include #include #include @@ -1999,18 +1998,8 @@ process: bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -#ifdef CONFIG_NET_DMA - struct tcp_sock *tp = tcp_sk(sk); - if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = net_dma_find_channel(); - if (tp->ucopy.dma_chan) + if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); - else -#endif - { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); - } } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); @@ -2169,11 +2158,6 @@ void tcp_v4_destroy_sock(struct sock *sk) } #endif -#ifdef CONFIG_NET_DMA - /* Cleans up our sk_async_wait_queue */ - __skb_queue_purge(&sk->sk_async_wait_queue); -#endif - /* Clean prequeue, it must be empty really */ __skb_queue_purge(&tp->ucopy.prequeue); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 889079b2ea8..cb21fccf208 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -1520,18 +1519,8 @@ process: bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -#ifdef CONFIG_NET_DMA - struct tcp_sock *tp = tcp_sk(sk); - if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = net_dma_find_channel(); - if (tp->ucopy.dma_chan) + if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); - else -#endif - { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v6_do_rcv(sk, skb); - } } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0080d2b0a8a..bb9cbc17d92 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -839,7 +839,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb, false); + sk_eat_skb(sk, skb); spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); *seq = 0; } @@ -861,10 +861,10 @@ copy_uaddr: llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb, false); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); - *seq = 0; + spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); + sk_eat_skb(sk, skb); + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + *seq = 0; } goto out; -- cgit v1.2.3-70-g09d2