From 8d8ad9d7c4bfe79bc91b7fc419ecfb9dcdfe6a51 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 26 Nov 2007 20:10:50 +0800 Subject: [NET]: Name magic constants in sock_wake_async() The sock_wake_async() performs a bit different actions depending on "how" argument. Unfortunately this argument ony has numerical magic values. I propose to give names to their constants to help people reading this function callers understand what's going on without looking into this function all the time. I suppose this is 2.6.25 material, but if it's not (or the naming seems poor/bad/awful), I can rework it against the current net-2.6 tree. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/sctp/socket.c') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ea9649ca0b2..dc2f9221f09 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6008,7 +6008,8 @@ static void __sctp_write_space(struct sctp_association *asoc) */ if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, + SOCK_WAKE_SPACE, POLL_OUT); } } } -- cgit v1.2.3-70-g09d2 From f57d96b2e92d209ab3991bba9a44e0d6ef7614a8 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 20 Dec 2007 14:12:24 -0800 Subject: [SCTP]: Change use_as_src into a full address state Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++-- net/sctp/bind_addr.c | 9 +++++---- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 8 ++++---- net/sctp/sm_make_chunk.c | 6 +++--- net/sctp/socket.c | 8 ++++---- 6 files changed, 25 insertions(+), 18 deletions(-) (limited to 'net/sctp/socket.c') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fa87873fee7..2528f8a837d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -760,12 +760,18 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, union sctp_addr *); const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); +enum { + SCTP_ADDR_NEW, /* new address added to assoc/ep */ + SCTP_ADDR_SRC, /* address can be used as source */ + SCTP_ADDR_DEL, /* address about to be deleted */ +}; + /* This is a structure for holding either an IPv6 or an IPv4 address. */ struct sctp_sockaddr_entry { struct list_head list; struct rcu_head rcu; union sctp_addr a; - __u8 use_as_src; + __u8 state; __u8 valid; }; @@ -1190,7 +1196,7 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, gfp_t gfp); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - __u8 use_as_src, gfp_t gfp); + __u8 addr_state, gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 6a7d01091f0..43266117478 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -171,7 +171,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - __u8 use_as_src, gfp_t gfp) + __u8 addr_state, gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -188,7 +188,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, if (!addr->a.v4.sin_port) addr->a.v4.sin_port = htons(bp->port); - addr->use_as_src = use_as_src; + addr->state = addr_state; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -312,7 +312,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, htons(port), 0); - retval = sctp_add_bind_addr(bp, &addr, 1, gfp); + retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp); if (retval) { /* Can't finish building the list, clean up. */ sctp_bind_addr_clean(bp); @@ -411,7 +411,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && (flags & SCTP_ADDR6_PEERSUPP)))) - error = sctp_add_bind_addr(dest, addr, 1, gfp); + error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC, + gfp); } return error; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7f31ff638bc..bd04aed673c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -330,7 +330,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid) continue; - if ((laddr->use_as_src) && + if ((laddr->state == SCTP_ADDR_SRC) && (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index dc22d710849..e466e00b9a9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -229,8 +229,8 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (((AF_INET6 == addr->a.sa.sa_family) && (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { - error = sctp_add_bind_addr(bp, &addr->a, 1, - GFP_ATOMIC); + error = sctp_add_bind_addr(bp, &addr->a, + SCTP_ADDR_SRC, GFP_ATOMIC); if (error) goto end_copy; } @@ -472,7 +472,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || !laddr->use_as_src) + if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) continue; sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) @@ -494,7 +494,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid) continue; - if ((laddr->use_as_src) && + if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; if (!ip_route_output_key(&rt, &fl)) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7fd6a6b6861..46f54188f00 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1692,8 +1692,8 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { - sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, - GFP_ATOMIC); + sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, + SCTP_ADDR_SRC, GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; @@ -3016,7 +3016,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, local_bh_disable(); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) - saddr->use_as_src = 1; + saddr->state = SCTP_ADDR_SRC; } local_bh_enable(); break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dc2f9221f09..7a8650f01d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -390,7 +390,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. */ - ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); + ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -585,8 +585,8 @@ static int sctp_send_asconf_add_ip(struct sock *sk, addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); - retval = sctp_add_bind_addr(bp, &saveaddr, 0, - GFP_ATOMIC); + retval = sctp_add_bind_addr(bp, &saveaddr, + SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } } @@ -777,7 +777,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, af = sctp_get_af_specific(laddr->v4.sin_family); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) - saddr->use_as_src = 0; + saddr->state = SCTP_ADDR_DEL; } addr_buf += af->sockaddr_len; } -- cgit v1.2.3-70-g09d2 From 3ab224be6d69de912ee21302745ea45a99274dbc Mon Sep 17 00:00:00 2001 From: Hideo Aoki Date: Mon, 31 Dec 2007 00:11:19 -0800 Subject: [NET] CORE: Introducing new memory accounting interface. This patch introduces new memory accounting functions for each network protocol. Most of them are renamed from memory accounting functions for stream protocols. At the same time, some stream memory accounting functions are removed since other functions do same thing. Renaming: sk_stream_free_skb() -> sk_wmem_free_skb() __sk_stream_mem_reclaim() -> __sk_mem_reclaim() sk_stream_mem_reclaim() -> sk_mem_reclaim() sk_stream_mem_schedule -> __sk_mem_schedule() sk_stream_pages() -> sk_mem_pages() sk_stream_rmem_schedule() -> sk_rmem_schedule() sk_stream_wmem_schedule() -> sk_wmem_schedule() sk_charge_skb() -> sk_mem_charge() Removeing sk_stream_rfree(): consolidates into sock_rfree() sk_stream_set_owner_r(): consolidates into skb_set_owner_r() sk_stream_mem_schedule() The following functions are added. sk_has_account(): check if the protocol supports accounting sk_mem_uncharge(): do the opposite of sk_mem_charge() In addition, to achieve consolidation, updating sk_wmem_queued is removed from sk_mem_charge(). Next, to consolidate memory accounting functions, this patch adds memory accounting calls to network core functions. Moreover, present memory accounting call is renamed to new accounting call. Finally we replace present memory accounting calls with new interface in TCP and SCTP. Signed-off-by: Takahiro Yasui Signed-off-by: Hideo Aoki Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 +- include/net/sock.h | 98 ++++++++++++++++++++++++++------------------- include/net/tcp.h | 4 +- net/core/datagram.c | 2 + net/core/sock.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ net/core/stream.c | 84 +------------------------------------- net/ipv4/tcp.c | 23 ++++++----- net/ipv4/tcp_input.c | 26 ++++++------ net/ipv4/tcp_output.c | 26 +++++++----- net/ipv4/tcp_timer.c | 8 ++-- net/sctp/protocol.c | 2 +- net/sctp/sm_statefuns.c | 2 +- net/sctp/socket.c | 11 ++--- net/sctp/ulpevent.c | 2 +- net/sctp/ulpqueue.c | 2 +- 15 files changed, 222 insertions(+), 175 deletions(-) (limited to 'net/sctp/socket.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1b81ede7c2b..4977b0a8153 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -463,8 +463,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) skb->destructor = sctp_sock_rfree; atomic_add(event->rmem_len, &sk->sk_rmem_alloc); /* - * This mimics the behavior of - * sk_stream_set_owner_r + * This mimics the behavior of skb_set_owner_r */ sk->sk_forward_alloc -= event->rmem_len; } diff --git a/include/net/sock.h b/include/net/sock.h index d27ba6fdd03..3d938f6c672 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -460,25 +460,6 @@ static inline int sk_stream_memory_free(struct sock *sk) return sk->sk_wmem_queued < sk->sk_sndbuf; } -extern void sk_stream_rfree(struct sk_buff *skb); - -static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb->sk = sk; - skb->destructor = sk_stream_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - sk->sk_forward_alloc -= skb->truesize; -} - -static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb) -{ - skb_truesize_check(skb); - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; - sk->sk_forward_alloc += skb->truesize; - __kfree_skb(skb); -} - /* The per-socket spinlock must be held here. */ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) { @@ -576,7 +557,7 @@ struct proto { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. - * All the sk_stream_mem_schedule() is of this nature: accounting + * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int *memory_pressure; @@ -712,33 +693,73 @@ static inline struct inode *SOCK_INODE(struct socket *socket) return &container_of(socket, struct socket_alloc, socket)->vfs_inode; } -extern void __sk_stream_mem_reclaim(struct sock *sk); -extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); +/* + * Functions for memory accounting + */ +extern int __sk_mem_schedule(struct sock *sk, int size, int kind); +extern void __sk_mem_reclaim(struct sock *sk); -#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE) -#define SK_STREAM_MEM_QUANTUM_SHIFT ilog2(SK_STREAM_MEM_QUANTUM) +#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) +#define SK_MEM_SEND 0 +#define SK_MEM_RECV 1 -static inline int sk_stream_pages(int amt) +static inline int sk_mem_pages(int amt) { - return (amt + SK_STREAM_MEM_QUANTUM - 1) >> SK_STREAM_MEM_QUANTUM_SHIFT; + return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; } -static inline void sk_stream_mem_reclaim(struct sock *sk) +static inline int sk_has_account(struct sock *sk) { - if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) - __sk_stream_mem_reclaim(sk); + /* return true if protocol supports memory accounting */ + return !!sk->sk_prot->memory_allocated; } -static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) +static inline int sk_wmem_schedule(struct sock *sk, int size) { - return (int)skb->truesize <= sk->sk_forward_alloc || - sk_stream_mem_schedule(sk, skb->truesize, 1); + if (!sk_has_account(sk)) + return 1; + return size <= sk->sk_forward_alloc || + __sk_mem_schedule(sk, size, SK_MEM_SEND); } -static inline int sk_stream_wmem_schedule(struct sock *sk, int size) +static inline int sk_rmem_schedule(struct sock *sk, int size) { + if (!sk_has_account(sk)) + return 1; return size <= sk->sk_forward_alloc || - sk_stream_mem_schedule(sk, size, 0); + __sk_mem_schedule(sk, size, SK_MEM_RECV); +} + +static inline void sk_mem_reclaim(struct sock *sk) +{ + if (!sk_has_account(sk)) + return; + if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) + __sk_mem_reclaim(sk); +} + +static inline void sk_mem_charge(struct sock *sk, int size) +{ + if (!sk_has_account(sk)) + return; + sk->sk_forward_alloc -= size; +} + +static inline void sk_mem_uncharge(struct sock *sk, int size) +{ + if (!sk_has_account(sk)) + return; + sk->sk_forward_alloc += size; +} + +static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) +{ + skb_truesize_check(skb); + sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + sk->sk_wmem_queued -= skb->truesize; + sk_mem_uncharge(sk, skb->truesize); + __kfree_skb(skb); } /* Used by processes to "lock" a socket state, so that @@ -1076,12 +1097,6 @@ static inline int sk_can_gso(const struct sock *sk) extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_wmem_queued += skb->truesize; - sk->sk_forward_alloc -= skb->truesize; -} - static inline int skb_copy_to_page(struct sock *sk, char __user *from, struct sk_buff *skb, struct page *page, int off, int copy) @@ -1101,7 +1116,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; - sk->sk_forward_alloc -= copy; + sk_mem_charge(sk, copy); return 0; } @@ -1127,6 +1142,7 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) skb->sk = sk; skb->destructor = sock_rfree; atomic_add(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_charge(sk, skb->truesize); } extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, diff --git a/include/net/tcp.h b/include/net/tcp.h index 13ebe11a0af..76286e80205 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1196,8 +1196,8 @@ static inline void tcp_write_queue_purge(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) - sk_stream_free_skb(sk, skb); - sk_stream_mem_reclaim(sk); + sk_wmem_free_skb(sk, skb); + sk_mem_reclaim(sk); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) diff --git a/net/core/datagram.c b/net/core/datagram.c index 2d733131d7c..8a28fc93b72 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -209,6 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); + sk_mem_reclaim(sk); } /** @@ -248,6 +249,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } kfree_skb(skb); + sk_mem_reclaim(sk); return err; } diff --git a/net/core/sock.c b/net/core/sock.c index 118214047ed..8c184c4a381 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + if (!sk_rmem_schedule(sk, skb->truesize)) { + err = -ENOBUFS; + goto out; + } + skb->dev = NULL; skb_set_owner_r(skb, sk); @@ -1107,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + skb_truesize_check(skb); atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(skb->sk, skb->truesize); } @@ -1384,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo) EXPORT_SYMBOL(sk_wait_data); +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + struct proto *prot = sk->sk_prot; + int amt = sk_mem_pages(size); + int allocated; + + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + allocated = atomic_add_return(amt, prot->memory_allocated); + + /* Under limit. */ + if (allocated <= prot->sysctl_mem[0]) { + if (prot->memory_pressure && *prot->memory_pressure) + *prot->memory_pressure = 0; + return 1; + } + + /* Under pressure. */ + if (allocated > prot->sysctl_mem[1]) + if (prot->enter_memory_pressure) + prot->enter_memory_pressure(); + + /* Over hard limit. */ + if (allocated > prot->sysctl_mem[2]) + goto suppress_allocation; + + /* guarantee minimum buffer size under pressure */ + if (kind == SK_MEM_RECV) { + if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + return 1; + } else { /* SK_MEM_SEND */ + if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + return 1; + } else if (atomic_read(&sk->sk_wmem_alloc) < + prot->sysctl_wmem[0]) + return 1; + } + + if (prot->memory_pressure) { + if (!*prot->memory_pressure || + prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + sk_mem_pages(sk->sk_wmem_queued + + atomic_read(&sk->sk_rmem_alloc) + + sk->sk_forward_alloc)) + return 1; + } + +suppress_allocation: + + if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { + sk_stream_moderate_sndbuf(sk); + + /* Fail only if socket is _under_ its sndbuf. + * In this case we cannot block, so that we have to fail. + */ + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + return 1; + } + + /* Alas. Undo changes. */ + sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; + atomic_sub(amt, prot->memory_allocated); + return 0; +} + +EXPORT_SYMBOL(__sk_mem_schedule); + +/** + * __sk_reclaim - reclaim memory_allocated + * @sk: socket + */ +void __sk_mem_reclaim(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + + atomic_sub(sk->sk_forward_alloc / SK_MEM_QUANTUM, + prot->memory_allocated); + sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + + if (prot->memory_pressure && *prot->memory_pressure && + (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) + *prot->memory_pressure = 0; +} + +EXPORT_SYMBOL(__sk_mem_reclaim); + + /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain diff --git a/net/core/stream.c b/net/core/stream.c index bf188ffdbdb..4a0ad152c9c 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -172,17 +172,6 @@ do_interrupted: EXPORT_SYMBOL(sk_stream_wait_memory); -void sk_stream_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - skb_truesize_check(skb); - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk->sk_forward_alloc += skb->truesize; -} - -EXPORT_SYMBOL(sk_stream_rfree); - int sk_stream_error(struct sock *sk, int flags, int err) { if (err == -EPIPE) @@ -194,77 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err) EXPORT_SYMBOL(sk_stream_error); -void __sk_stream_mem_reclaim(struct sock *sk) -{ - atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT, - sk->sk_prot->memory_allocated); - sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; - if (*sk->sk_prot->memory_pressure && - (atomic_read(sk->sk_prot->memory_allocated) < - sk->sk_prot->sysctl_mem[0])) - *sk->sk_prot->memory_pressure = 0; -} - -EXPORT_SYMBOL(__sk_stream_mem_reclaim); - -int sk_stream_mem_schedule(struct sock *sk, int size, int kind) -{ - int amt = sk_stream_pages(size); - struct proto *prot = sk->sk_prot; - - sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM; - atomic_add(amt, prot->memory_allocated); - - /* Under limit. */ - if (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]) { - if (*prot->memory_pressure) - *prot->memory_pressure = 0; - return 1; - } - - /* Over hard limit. */ - if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[2]) { - prot->enter_memory_pressure(); - goto suppress_allocation; - } - - /* Under pressure. */ - if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[1]) - prot->enter_memory_pressure(); - - if (kind) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) - return 1; - } else if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) - return 1; - - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * - sk_stream_pages(sk->sk_wmem_queued + - atomic_read(&sk->sk_rmem_alloc) + - sk->sk_forward_alloc)) - return 1; - -suppress_allocation: - - if (!kind) { - sk_stream_moderate_sndbuf(sk); - - /* Fail only if socket is _under_ its sndbuf. - * In this case we cannot block, so that we have to fail. - */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) - return 1; - } - - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM; - atomic_sub(amt, prot->memory_allocated); - return 0; -} - -EXPORT_SYMBOL(sk_stream_mem_schedule); - void sk_stream_kill_queues(struct sock *sk) { /* First the read buffer. */ @@ -277,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk) BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); BUG_TRAP(!sk->sk_wmem_queued); BUG_TRAP(!sk->sk_forward_alloc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fdaf965a679..2cbfa6df797 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -308,7 +308,7 @@ struct tcp_splice_state { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. - * All the sk_stream_mem_schedule() is of this nature: accounting + * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; @@ -485,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk_charge_skb(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } @@ -638,7 +639,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { - if (sk_stream_wmem_schedule(sk, skb->truesize)) { + if (sk_wmem_schedule(sk, skb->truesize)) { /* * Make sure that we have exactly size bytes * available to the caller, no more, no less. @@ -707,7 +708,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { @@ -721,7 +722,7 @@ new_segment: skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; - sk->sk_forward_alloc -= copy; + sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -928,7 +929,7 @@ new_segment: if (copy > PAGE_SIZE - off) copy = PAGE_SIZE - off; - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (!page) { @@ -1019,7 +1020,7 @@ do_fault: * reset, where we can be unlinking the send_head. */ tcp_check_send_head(sk, skb); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } do_error: @@ -1738,7 +1739,7 @@ void tcp_close(struct sock *sk, long timeout) __kfree_skb(skb); } - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of @@ -1841,7 +1842,7 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (tcp_too_many_orphans(sk, atomic_read(sk->sk_prot->orphan_count))) { if (net_ratelimit()) @@ -2658,11 +2659,11 @@ void __init tcp_init(void) limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; sysctl_tcp_wmem[2] = max(64*1024, max_share); - sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index efea9873208..722c9cbb91e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -591,7 +591,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * restart window, so that we send ACKs quickly. */ tcp_incr_quickack(sk); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); } } icsk->icsk_ack.lrcvtime = now; @@ -2851,7 +2851,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) break; tcp_unlink_write_queue(skb, sk); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); tcp_clear_all_retrans_hints(tp); } @@ -3567,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) __skb_queue_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -3850,12 +3850,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) queue_and_out: if (eaten < 0 && (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_stream_rmem_schedule(sk, skb))) { + !sk_rmem_schedule(sk, skb->truesize))) { if (tcp_prune_queue(sk) < 0 || - !sk_stream_rmem_schedule(sk, skb)) + !sk_rmem_schedule(sk, skb->truesize)) goto drop; } - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -3924,9 +3924,9 @@ drop: TCP_ECN_check_ce(tp, skb); if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_stream_rmem_schedule(sk, skb)) { + !sk_rmem_schedule(sk, skb->truesize)) { if (tcp_prune_queue(sk) < 0 || - !sk_stream_rmem_schedule(sk, skb)) + !sk_rmem_schedule(sk, skb->truesize)) goto drop; } @@ -3937,7 +3937,7 @@ drop: SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); if (!skb_peek(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ @@ -4079,7 +4079,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, list); - sk_stream_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { @@ -4177,7 +4177,7 @@ static int tcp_prune_queue(struct sock *sk) sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -4197,7 +4197,7 @@ static int tcp_prune_queue(struct sock *sk) */ if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); } if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) @@ -4699,7 +4699,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Bulk data transfer: receiver */ __skb_pull(skb,tcp_header_len); __skb_queue_tail(&sk->sk_receive_queue, skb); - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9058e0a2510..7a4834a2ae8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,7 +637,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) tp->write_seq = TCP_SKB_CB(skb)->end_seq; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk_charge_skb(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); } static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) @@ -701,7 +702,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); nlen = skb->len - len - nsize; buff->truesize += nlen; skb->truesize -= nlen; @@ -825,7 +827,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) skb->truesize -= len; sk->sk_wmem_queued -= len; - sk->sk_forward_alloc += len; + sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); /* Any change of skb->len requires recalculation of tso @@ -1197,7 +1199,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (unlikely(buff == NULL)) return -ENOMEM; - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); buff->truesize += nlen; skb->truesize -= nlen; @@ -1350,7 +1353,8 @@ static int tcp_mtu_probe(struct sock *sk) /* We're allowed to probe. Build it now. */ if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) return -1; - sk_charge_skb(sk, nskb); + sk->sk_wmem_queued += nskb->truesize; + sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); @@ -1377,7 +1381,7 @@ static int tcp_mtu_probe(struct sock *sk) * Throw it away. */ TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; tcp_unlink_write_queue(skb, sk); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -1744,7 +1748,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); - sk_stream_free_skb(sk, next_skb); + sk_wmem_free_skb(sk, next_skb); } } @@ -2139,8 +2143,9 @@ int tcp_send_synack(struct sock *sk) tcp_unlink_write_queue(skb, sk); skb_header_release(nskb); __tcp_add_write_queue_head(sk, nskb); - sk_stream_free_skb(sk, skb); - sk_charge_skb(sk, nskb); + sk_wmem_free_skb(sk, skb); + sk->sk_wmem_queued += nskb->truesize; + sk_mem_charge(sk, nskb->truesize); skb = nskb; } @@ -2343,7 +2348,8 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = TCP_SKB_CB(buff)->when; skb_header_release(buff); __tcp_add_write_queue_tail(sk, buff); - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ea85bc00c61..17931be6d58 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -186,7 +186,7 @@ static void tcp_delack_timer(unsigned long data) goto out_unlock; } - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; @@ -226,7 +226,7 @@ static void tcp_delack_timer(unsigned long data) out: if (tcp_memory_pressure) - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); sock_put(sk); @@ -420,7 +420,7 @@ static void tcp_write_timer(unsigned long data) TCP_CHECK_TIMER(sk); out: - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); sock_put(sk); @@ -514,7 +514,7 @@ static void tcp_keepalive_timer (unsigned long data) } TCP_CHECK_TIMER(sk); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); resched: inet_csk_reset_keepalive_timer (sk, elapsed); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e466e00b9a9..b9219649502 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1109,7 +1109,7 @@ SCTP_STATIC __init int sctp_init(void) sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); - sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; sysctl_sctp_wmem[1] = 16*1024; sysctl_sctp_wmem[2] = max(64*1024, max_share); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 511d8c9a171..b1267519183 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5844,7 +5844,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* * Also try to renege to limit our memory usage in the event that * we are under memory pressure - * If we can't renege, don't worry about it, the sk_stream_rmem_schedule + * If we can't renege, don't worry about it, the sk_rmem_schedule * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our * memory usage too much */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7a8650f01d0..710df67a678 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -174,7 +174,8 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sctp_chunk); atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); - sk_charge_skb(sk, chunk->skb); + sk->sk_wmem_queued += chunk->skb->truesize; + sk_mem_charge(sk, chunk->skb->truesize); } /* Verify that this is a valid address. */ @@ -6035,10 +6036,10 @@ static void sctp_wfree(struct sk_buff *skb) atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); /* - * This undoes what is done via sk_charge_skb + * This undoes what is done via sctp_set_owner_w and sk_mem_charge */ sk->sk_wmem_queued -= skb->truesize; - sk->sk_forward_alloc += skb->truesize; + sk_mem_uncharge(sk, skb->truesize); sock_wfree(skb); __sctp_write_space(asoc); @@ -6059,9 +6060,9 @@ void sctp_sock_rfree(struct sk_buff *skb) atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); /* - * Mimic the behavior of sk_stream_rfree + * Mimic the behavior of sock_rfree */ - sk->sk_forward_alloc += event->rmem_len; + sk_mem_uncharge(sk, event->rmem_len); } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 307314356e1..047c27df98f 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -700,7 +700,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) goto fail; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 1733fa29a50..c25caefa3bc 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1046,7 +1046,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_ulpq_partial_delivery(ulpq, chunk, gfp); } - sk_stream_mem_reclaim(asoc->base.sk); + sk_mem_reclaim(asoc->base.sk); return; } -- cgit v1.2.3-70-g09d2