From 286ab3d46058840d68e5d7d52e316c1f7e98c59f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:38:39 -0800 Subject: [NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way. "struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/proc.c | 19 ++++--------------- net/ipv6/proc.c | 19 ++++--------------- 3 files changed, 55 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 12ad2067a98..e077f263b73 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); +#ifdef CONFIG_SMP +/* + * Define default functions to keep track of inuse sockets per protocol + * Note that often used protocols use dedicated functions to get a speed increase. + * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) + */ +static void inuse_add(struct proto *prot, int inc) +{ + per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; +} + +static int inuse_get(const struct proto *prot) +{ + int res = 0, cpu; + for_each_possible_cpu(cpu) + res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; + return res; +} +#endif + int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; int rc = -ENOBUFS; +#ifdef CONFIG_SMP + if (!prot->inuse_getval || !prot->inuse_add) { + prot->inuse_ptr = alloc_percpu(int); + if (prot->inuse_ptr == NULL) + goto out; + prot->inuse_getval = inuse_get; + prot->inuse_add = inuse_add; + } +#endif if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", prot->name); - goto out; + goto out_free_inuse; } if (prot->rsk_prot != NULL) { @@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name: out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; +out_free_inuse: +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif goto out; } @@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ffdccc0972e..ce34b281803 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,17 +46,6 @@ #include #include -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - /* * Report socket allocation statistics [mea@utu.fi] */ @@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", - fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), + sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); - seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); - seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); + seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); + seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(), ip_frag_mem()); return 0; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index be526ad9254..8631ed7fe8a 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -32,27 +32,16 @@ static struct proc_dir_entry *proc_net_devsnmp6; -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - static int sockstat6_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "TCP6: inuse %d\n", - fold_prot_inuse(&tcpv6_prot)); + sock_prot_inuse(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - fold_prot_inuse(&udpv6_prot)); + sock_prot_inuse(&udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - fold_prot_inuse(&udplitev6_prot)); + sock_prot_inuse(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - fold_prot_inuse(&rawv6_prot)); + sock_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", ip6_frag_nqueues(), ip6_frag_mem()); return 0; -- cgit v1.2.3-70-g09d2