From 2a91525c20d3aae15b33c189514b9e20e30ef8a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2009 11:30:05 +0000 Subject: net: net/core/sock.c cleanup Pure style cleanup patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 103 ++++++++++++++++++++++++-------------------------------- 1 file changed, 44 insertions(+), 59 deletions(-) (limited to 'net/core/sock.c') diff --git a/net/core/sock.c b/net/core/sock.c index 7dbf3ffb35c..58dec9dff99 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -212,6 +212,7 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; /* Maximal space eaten by iovec or ancilliary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); +EXPORT_SYMBOL(sysctl_optmem_max); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -444,7 +445,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; int val; int valbool; struct linger ling; @@ -463,15 +464,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - valbool = val?1:0; + valbool = val ? 1 : 0; lock_sock(sk); - switch(optname) { + switch (optname) { case SO_DEBUG: - if (val && !capable(CAP_NET_ADMIN)) { + if (val && !capable(CAP_NET_ADMIN)) ret = -EACCES; - } else + else sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: @@ -582,7 +583,7 @@ set_rcvbuf: ret = -EINVAL; /* 1003.1g */ break; } - if (copy_from_user(&ling,optval,sizeof(ling))) { + if (copy_from_user(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } @@ -690,9 +691,8 @@ set_rcvbuf: case SO_MARK: if (!capable(CAP_NET_ADMIN)) ret = -EPERM; - else { + else sk->sk_mark = val; - } break; /* We implement the SO_SNDLOWAT etc to @@ -704,6 +704,7 @@ set_rcvbuf: release_sock(sk); return ret; } +EXPORT_SYMBOL(sock_setsockopt); int sock_getsockopt(struct socket *sock, int level, int optname, @@ -727,7 +728,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, memset(&v, 0, sizeof(v)); - switch(optname) { + switch (optname) { case SO_DEBUG: v.val = sock_flag(sk, SOCK_DBG); break; @@ -762,7 +763,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_ERROR: v.val = -sock_error(sk); - if (v.val==0) + if (v.val == 0) v.val = xchg(&sk->sk_err_soft, 0); break; @@ -816,7 +817,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RCVTIMEO: - lv=sizeof(struct timeval); + lv = sizeof(struct timeval); if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; @@ -827,7 +828,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_SNDTIMEO: - lv=sizeof(struct timeval); + lv = sizeof(struct timeval); if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; @@ -842,7 +843,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_SNDLOWAT: - v.val=1; + v.val = 1; break; case SO_PASSCRED: @@ -1002,6 +1003,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, return sk; } +EXPORT_SYMBOL(sk_alloc); void sk_free(struct sock *sk) { @@ -1026,6 +1028,7 @@ void sk_free(struct sock *sk) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } +EXPORT_SYMBOL(sk_free); /* * Last sock_put should drop referrence to sk->sk_net. It has already @@ -1126,7 +1129,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) out: return newsk; } - EXPORT_SYMBOL_GPL(sk_clone); void sk_setup_caps(struct sock *sk, struct dst_entry *dst) @@ -1177,6 +1179,7 @@ void sock_wfree(struct sk_buff *skb) sk->sk_write_space(sk); sock_put(sk); } +EXPORT_SYMBOL(sock_wfree); /* * Read buffer destructor automatically called from kfree_skb. @@ -1188,6 +1191,7 @@ void sock_rfree(struct sk_buff *skb) atomic_sub(skb->truesize, &sk->sk_rmem_alloc); sk_mem_uncharge(skb->sk, skb->truesize); } +EXPORT_SYMBOL(sock_rfree); int sock_i_uid(struct sock *sk) @@ -1199,6 +1203,7 @@ int sock_i_uid(struct sock *sk) read_unlock(&sk->sk_callback_lock); return uid; } +EXPORT_SYMBOL(sock_i_uid); unsigned long sock_i_ino(struct sock *sk) { @@ -1209,6 +1214,7 @@ unsigned long sock_i_ino(struct sock *sk) read_unlock(&sk->sk_callback_lock); return ino; } +EXPORT_SYMBOL(sock_i_ino); /* * Allocate a skb from the socket's send buffer. @@ -1217,7 +1223,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { - struct sk_buff * skb = alloc_skb(size, priority); + struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); return skb; @@ -1225,6 +1231,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, } return NULL; } +EXPORT_SYMBOL(sock_wmalloc); /* * Allocate a skb from the socket's receive buffer. @@ -1261,6 +1268,7 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) } return NULL; } +EXPORT_SYMBOL(sock_kmalloc); /* * Free an option memory block. @@ -1270,11 +1278,12 @@ void sock_kfree_s(struct sock *sk, void *mem, int size) kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } +EXPORT_SYMBOL(sock_kfree_s); /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. I think, these locks should be removed for datagram sockets. */ -static long sock_wait_for_wmem(struct sock * sk, long timeo) +static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); @@ -1392,6 +1401,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, { return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); } +EXPORT_SYMBOL(sock_alloc_send_skb); static void __lock_sock(struct sock *sk) { @@ -1460,7 +1470,6 @@ int sk_wait_data(struct sock *sk, long *timeo) finish_wait(sk->sk_sleep, &wait); return rc; } - EXPORT_SYMBOL(sk_wait_data); /** @@ -1541,7 +1550,6 @@ suppress_allocation: atomic_sub(amt, prot->memory_allocated); return 0; } - EXPORT_SYMBOL(__sk_mem_schedule); /** @@ -1560,7 +1568,6 @@ void __sk_mem_reclaim(struct sock *sk) (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) *prot->memory_pressure = 0; } - EXPORT_SYMBOL(__sk_mem_reclaim); @@ -1575,78 +1582,92 @@ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_bind); int sock_no_connect(struct socket *sock, struct sockaddr *saddr, int len, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_connect); int sock_no_socketpair(struct socket *sock1, struct socket *sock2) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_socketpair); int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_accept); int sock_no_getname(struct socket *sock, struct sockaddr *saddr, int *len, int peer) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_getname); -unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) +unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) { return 0; } +EXPORT_SYMBOL(sock_no_poll); int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_ioctl); int sock_no_listen(struct socket *sock, int backlog) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_listen); int sock_no_shutdown(struct socket *sock, int how) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_shutdown); int sock_no_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_setsockopt); int sock_no_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_getsockopt); int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_sendmsg); int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_recvmsg); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { /* Mirror missing mmap method error code */ return -ENODEV; } +EXPORT_SYMBOL(sock_no_mmap); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -1660,6 +1681,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz kunmap(page); return res; } +EXPORT_SYMBOL(sock_no_sendpage); /* * Default Socket Callbacks @@ -1723,6 +1745,7 @@ void sk_send_sigurg(struct sock *sk) if (send_sigurg(&sk->sk_socket->file->f_owner)) sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } +EXPORT_SYMBOL(sk_send_sigurg); void sk_reset_timer(struct sock *sk, struct timer_list* timer, unsigned long expires) @@ -1730,7 +1753,6 @@ void sk_reset_timer(struct sock *sk, struct timer_list* timer, if (!mod_timer(timer, expires)) sock_hold(sk); } - EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) @@ -1738,7 +1760,6 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer) if (timer_pending(timer) && del_timer(timer)) __sock_put(sk); } - EXPORT_SYMBOL(sk_stop_timer); void sock_init_data(struct socket *sock, struct sock *sk) @@ -1797,6 +1818,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) atomic_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } +EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { @@ -1812,7 +1834,6 @@ void lock_sock_nested(struct sock *sk, int subclass) mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); } - EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) @@ -1895,7 +1916,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(sock_common_getsockopt); #ifdef CONFIG_COMPAT @@ -1925,7 +1945,6 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = addr_len; return err; } - EXPORT_SYMBOL(sock_common_recvmsg); /* @@ -1938,7 +1957,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(sock_common_setsockopt); #ifdef CONFIG_COMPAT @@ -1989,7 +2007,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); sock_put(sk); } - EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); @@ -2171,7 +2188,6 @@ out_free_sock_slab: out: return -ENOBUFS; } - EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) @@ -2198,7 +2214,6 @@ void proto_unregister(struct proto *prot) prot->twsk_prot->twsk_slab = NULL; } } - EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS @@ -2324,33 +2339,3 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ - -EXPORT_SYMBOL(sk_alloc); -EXPORT_SYMBOL(sk_free); -EXPORT_SYMBOL(sk_send_sigurg); -EXPORT_SYMBOL(sock_alloc_send_skb); -EXPORT_SYMBOL(sock_init_data); -EXPORT_SYMBOL(sock_kfree_s); -EXPORT_SYMBOL(sock_kmalloc); -EXPORT_SYMBOL(sock_no_accept); -EXPORT_SYMBOL(sock_no_bind); -EXPORT_SYMBOL(sock_no_connect); -EXPORT_SYMBOL(sock_no_getname); -EXPORT_SYMBOL(sock_no_getsockopt); -EXPORT_SYMBOL(sock_no_ioctl); -EXPORT_SYMBOL(sock_no_listen); -EXPORT_SYMBOL(sock_no_mmap); -EXPORT_SYMBOL(sock_no_poll); -EXPORT_SYMBOL(sock_no_recvmsg); -EXPORT_SYMBOL(sock_no_sendmsg); -EXPORT_SYMBOL(sock_no_sendpage); -EXPORT_SYMBOL(sock_no_setsockopt); -EXPORT_SYMBOL(sock_no_shutdown); -EXPORT_SYMBOL(sock_no_socketpair); -EXPORT_SYMBOL(sock_rfree); -EXPORT_SYMBOL(sock_setsockopt); -EXPORT_SYMBOL(sock_wfree); -EXPORT_SYMBOL(sock_wmalloc); -EXPORT_SYMBOL(sock_i_uid); -EXPORT_SYMBOL(sock_i_ino); -EXPORT_SYMBOL(sysctl_optmem_max); -- cgit v1.2.3-70-g09d2 From fcb94e422479da52ed90bab230c59617a0462416 Mon Sep 17 00:00:00 2001 From: Sergey Lapin Date: Mon, 8 Jun 2009 12:18:47 +0000 Subject: Add constants for the ieee 802.15.4 stack IEEE 802.15.4 stack requires several constants to be defined/adjusted. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Sergey Lapin Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ include/linux/if_ether.h | 1 + include/linux/socket.h | 4 +++- net/core/dev.c | 6 ++++-- net/core/sock.c | 3 +++ 5 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 5ff89809a58..b554300ef8b 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -86,6 +86,8 @@ #define ARPHRD_IEEE80211 801 /* IEEE 802.11 */ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_IEEE802154 804 +#define ARPHRD_IEEE802154_PHY 805 #define ARPHRD_PHONET 820 /* PhoNet media type */ #define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index cfe4fe1b713..11a60e4f0a6 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -106,6 +106,7 @@ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ +#define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ /* * This is an Ethernet frame header. diff --git a/include/linux/socket.h b/include/linux/socket.h index d2310cb45d2..3b461dffe24 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -194,7 +194,8 @@ struct ucred { #define AF_RXRPC 33 /* RxRPC sockets */ #define AF_ISDN 34 /* mISDN sockets */ #define AF_PHONET 35 /* Phonet sockets */ -#define AF_MAX 36 /* For now.. */ +#define AF_IEEE802154 36 /* IEEE802154 sockets */ +#define AF_MAX 37 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -233,6 +234,7 @@ struct ucred { #define PF_RXRPC AF_RXRPC #define PF_ISDN AF_ISDN #define PF_PHONET AF_PHONET +#define PF_IEEE802154 AF_IEEE802154 #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/net/core/dev.c b/net/core/dev.c index 81b392ef511..11560e3258b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -269,7 +269,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -286,7 +287,8 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/core/sock.c b/net/core/sock.c index 58dec9dff99..04e35eb2e73 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { @@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { @@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_MAX" }; -- cgit v1.2.3-70-g09d2 From 2b85a34e911bf483c27cfdd124aeb1605145dc80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jun 2009 02:55:43 -0700 Subject: net: No more expensive sock_hold()/sock_put() on each tx One of the problem with sock memory accounting is it uses a pair of sock_hold()/sock_put() for each transmitted packet. This slows down bidirectional flows because the receive path also needs to take a refcount on socket and might use a different cpu than transmit path or transmit completion path. So these two atomic operations also trigger cache line bounces. We can see this in tx or tx/rx workloads (media gateways for example), where sock_wfree() can be in top five functions in profiles. We use this sock_hold()/sock_put() so that sock freeing is delayed until all tx packets are completed. As we also update sk_wmem_alloc, we could offset sk_wmem_alloc by one unit at init time, until sk_free() is called. Once sk_free() is called, we atomic_dec_and_test(sk_wmem_alloc) to decrement initial offset and atomicaly check if any packets are in flight. skb_set_owner_w() doesnt call sock_hold() anymore sock_wfree() doesnt call sock_put() anymore, but check if sk_wmem_alloc reached 0 to perform the final freeing. Drawback is that a skb->truesize error could lead to unfreeable sockets, or even worse, prematurely calling __sk_free() on a live socket. Nice speedups on SMP. tbench for example, going from 2691 MB/s to 2711 MB/s on my 8 cpu dev machine, even if tbench was not really hitting sk_refcnt contention point. 5 % speedup on a UDP transmit workload (depends on number of flows), lowering TX completion cpu usage. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- net/core/sock.c | 29 +++++++++++++++++++++++++---- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - 4 files changed, 30 insertions(+), 7 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index 4bb1ff9fd15..010e14a93c9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1217,9 +1217,13 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { - sock_hold(sk); skb->sk = sk; skb->destructor = sock_wfree; + /* + * We used to take a refcount on sk, but following operation + * is enough to guarantee sk_free() wont free this sock until + * all in-flight packets are completed + */ atomic_add(skb->truesize, &sk->sk_wmem_alloc); } diff --git a/net/core/sock.c b/net/core/sock.c index 04e35eb2e73..06e26b77ad9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1008,7 +1008,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -void sk_free(struct sock *sk) +static void __sk_free(struct sock *sk) { struct sk_filter *filter; @@ -1031,6 +1031,17 @@ void sk_free(struct sock *sk) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } + +void sk_free(struct sock *sk) +{ + /* + * We substract one from sk_wmem_alloc and can know if + * some packets are still in some tx queue. + * If not null, sock_wfree() will call __sk_free(sk) later + */ + if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + __sk_free(sk); +} EXPORT_SYMBOL(sk_free); /* @@ -1071,7 +1082,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; atomic_set(&newsk->sk_rmem_alloc, 0); - atomic_set(&newsk->sk_wmem_alloc, 0); + /* + * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) + */ + atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); @@ -1175,12 +1189,18 @@ void __init sk_init(void) void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + int res; /* In case it might be waiting for more memory. */ - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc); if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) sk->sk_write_space(sk); - sock_put(sk); + /* + * if sk_wmem_alloc reached 0, we are last user and should + * free this sock, as sk_free() call could not do it. + */ + if (res == 0) + __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1819,6 +1839,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); atomic_set(&sk->sk_refcnt, 1); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9248d2807ba..24702628266 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -498,7 +498,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) BUG_ON(frag->sk); if (skb->sk) { - sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; truesizes += frag->truesize; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index db6c7224a86..7c76e3d1821 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -680,7 +680,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) BUG_ON(frag->sk); if (skb->sk) { - sock_hold(skb->sk); frag->sk = skb->sk; frag->destructor = sock_wfree; truesizes += frag->truesize; -- cgit v1.2.3-70-g09d2 From a98b65a3ad71e702e760bc63f57684301628e837 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 26 Feb 2009 14:46:57 +0100 Subject: net: annotate struct sock bitfield 2009/2/24 Ingo Molnar : > ok, this is the last warning i have from today's overnight -tip > testruns - a 32-bit system warning in sock_init_data(): > > [ 2.610389] NET: Registered protocol family 16 > [ 2.616138] initcall netlink_proto_init+0x0/0x170 returned 0 after 7812 usecs > [ 2.620010] WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (f642c184) > [ 2.624002] 010000000200000000000000604990c000000000000000000000000000000000 > [ 2.634076] i i i i i i u u i i i i i i i i i i i i i i i i i i i i i i i i > [ 2.641038] ^ > [ 2.643376] > [ 2.644004] Pid: 1, comm: swapper Not tainted (2.6.29-rc6-tip-01751-g4d1c22c-dirty #885) > [ 2.648003] EIP: 0060:[] EFLAGS: 00010282 CPU: 0 > [ 2.652008] EIP is at sock_init_data+0xa1/0x190 > [ 2.656003] EAX: 0001a800 EBX: f6836c00 ECX: 00463000 EDX: c0e46fe0 > [ 2.660003] ESI: f642c180 EDI: c0b83088 EBP: f6863ed8 ESP: c0c412ec > [ 2.664003] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > [ 2.668003] CR0: 8005003b CR2: f682c400 CR3: 00b91000 CR4: 000006f0 > [ 2.672003] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 > [ 2.676003] DR6: ffff4ff0 DR7: 00000400 > [ 2.680002] [] __netlink_create+0x35/0xa0 > [ 2.684002] [] netlink_kernel_create+0x4c/0x140 > [ 2.688002] [] rtnetlink_net_init+0x1e/0x40 > [ 2.696002] [] register_pernet_operations+0x11/0x30 > [ 2.700002] [] register_pernet_subsys+0x1c/0x30 > [ 2.704002] [] rtnetlink_init+0x4c/0x100 > [ 2.708002] [] netlink_proto_init+0x159/0x170 > [ 2.712002] [] do_one_initcall+0x24/0x150 > [ 2.716002] [] do_initcalls+0x27/0x40 > [ 2.723201] [] do_basic_setup+0x1c/0x20 > [ 2.728002] [] kernel_init+0x5a/0xa0 > [ 2.732002] [] kernel_thread_helper+0x7/0x10 > [ 2.736002] [] 0xffffffff We fix this false positive by annotating the bitfield in struct sock. Reported-by: Ingo Molnar Signed-off-by: Vegard Nossum --- include/net/sock.h | 2 ++ net/core/sock.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index 4bb1ff9fd15..d933da00d50 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -218,9 +218,11 @@ struct sock { #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net + kmemcheck_bitfield_begin(flags); unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; + kmemcheck_bitfield_end(flags); unsigned char sk_protocol; unsigned short sk_type; int sk_rcvbuf; diff --git a/net/core/sock.c b/net/core/sock.c index 7dbf3ffb35c..ce72c0ae424 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -941,6 +941,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { + kmemcheck_annotate_bitfield(sk, flags); + if (security_sk_alloc(sk, family, priority)) goto out_free; -- cgit v1.2.3-70-g09d2