From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:27:22 -0800 Subject: tcp: add some mibs to track collapsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/proc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4/proc.c') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a631a1f110c..731789bb499 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -234,6 +234,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), + SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), + SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_SENTINEL }; -- cgit v1.2.3-70-g09d2 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/core/sock.c | 10 +++++++--- net/ipv4/proc.c | 3 ++- net/ipv4/tcp.c | 8 ++++++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/sctp/protocol.c | 6 +++++- net/sctp/socket.c | 6 +++--- net/unix/af_unix.c | 1 - 11 files changed, 29 insertions(+), 16 deletions(-) (limited to 'net/ipv4/proc.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 23797506f59..bbb7742195b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); +extern struct percpu_counter sctp_sockets_allocated; /* * sctp/primitive.c diff --git a/include/net/sock.h b/include/net/sock.h index 00cd486d362..a2a3890959c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,7 +649,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. diff --git a/include/net/tcp.h b/include/net/tcp.h index e8ae90a8c35..cbca3b8a133 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* diff --git a/net/core/sock.c b/net/core/sock.c index a4e840e5a05..7a081b647bf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 731789bb499..4944b47ad62 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 044224a341e..e6fade9ebf6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -2685,6 +2688,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cab2458f86f..26b9030747c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f259c9671f3..8702b06cb60 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a8ca743241e..d5ea232c912 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,6 +137,8 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); out_nomem: return -ENOMEM; #else diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba81fe3ccab..a2de585888d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a35a6e8bf9..5aaf23e43f1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = { static struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, - .sockets_allocated = &unix_nr_socks, .obj_size = sizeof(struct unix_sock), }; -- cgit v1.2.3-70-g09d2 From dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:17:14 -0800 Subject: net: Use a percpu_counter for orphan_count Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/dccp/dccp.h | 2 +- net/dccp/proto.c | 16 ++++++++++------ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/proc.c | 2 +- net/ipv4/tcp.c | 12 +++++++----- net/ipv4/tcp_timer.c | 2 +- 8 files changed, 24 insertions(+), 18 deletions(-) (limited to 'net/ipv4/proc.c') diff --git a/include/net/sock.h b/include/net/sock.h index a2a3890959c..5a3a151bd73 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -666,7 +666,7 @@ struct proto { unsigned int obj_size; int slab_flags; - atomic_t *orphan_count; + struct percpu_counter *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index cbca3b8a133..de1e91d959b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,7 +46,7 @@ extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 031ce350d3c..33a1127270c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -49,7 +49,7 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern atomic_t dccp_orphan_count; +extern struct percpu_counter dccp_orphan_count; extern void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ea85c423cdb..db225f93cd5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; @@ -1000,7 +999,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -1064,18 +1063,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out; + rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1168,6 +1170,8 @@ out_free_dccp_ehash: out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_hashinfo.bind_bucket_cachep = NULL; +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05af807ca9b..1ccdbba528b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(sk->sk_prot->orphan_count); + percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); inet_csk_destroy_sock(child); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4944b47ad62..614958b7c27 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), - atomic_read(&tcp_orphan_count), + (int)percpu_counter_sum_positive(&tcp_orphan_count), tcp_death_row.tw_count, (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6fade9ebf6..01924340862 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,8 +277,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -atomic_t tcp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; @@ -1837,7 +1836,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1888,9 +1887,11 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { + int orphan_count = percpu_counter_read_positive( + sk->sk_prot->orphan_count); + sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, - atomic_read(sk->sk_prot->orphan_count))) { + if (tcp_too_many_orphans(sk, orphan_count)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2689,6 +2690,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); + percpu_counter_init(&tcp_orphan_count, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3df339e3e36..cc4e6d27ded 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = atomic_read(&tcp_orphan_count); + int orphans = percpu_counter_read_positive(&tcp_orphan_count); /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ -- cgit v1.2.3-70-g09d2