diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 241 | ||||
-rw-r--r-- | net/core/dev.c | 688 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 137 | ||||
-rw-r--r-- | net/core/fib_rules.c | 4 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 15 | ||||
-rw-r--r-- | net/core/iovec.c | 33 | ||||
-rw-r--r-- | net/core/neighbour.c | 57 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 9 | ||||
-rw-r--r-- | net/core/net-traces.c | 3 | ||||
-rw-r--r-- | net/core/net_namespace.c | 54 | ||||
-rw-r--r-- | net/core/netpoll.c | 15 | ||||
-rw-r--r-- | net/core/pktgen.c | 9 | ||||
-rw-r--r-- | net/core/skb_dma_map.c | 13 | ||||
-rw-r--r-- | net/core/skbuff.c | 317 | ||||
-rw-r--r-- | net/core/sock.c | 137 | ||||
-rw-r--r-- | net/core/stream.c | 3 | ||||
-rw-r--r-- | net/core/user_dma.c | 46 |
17 files changed, 1190 insertions, 591 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index b01a76abe1d..58abee1f1df 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -260,7 +260,9 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) spin_unlock_bh(&sk->sk_receive_queue.lock); } - skb_free_datagram(sk, skb); + kfree_skb(skb); + sk_mem_reclaim_partial(sk); + return err; } @@ -280,6 +282,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; /* Copy header. */ if (copy > 0) { @@ -320,28 +323,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_iovec(list, - offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_iovec(frag_iter, + offset - start, + to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; } + start = end; } if (!len) return 0; @@ -351,30 +350,124 @@ fault: } /** + * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying from + * @to: io vector to copy to + * @to_offset: offset in the io vector to start copying to + * @len: amount of data to copy from buffer to iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is not modified during the copy. + */ +int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, + const struct iovec *to, int to_offset, + int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + struct sk_buff *frag_iter; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_toiovecend(to, vaddr + frag->page_offset + + offset - start, to_offset, copy); + kunmap(page); + if (err) + goto fault; + if (!(len -= copy)) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_const_iovec(frag_iter, + offset - start, + to, to_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + if (!len) + return 0; + +fault: + return -EFAULT; +} +EXPORT_SYMBOL(skb_copy_datagram_const_iovec); + +/** * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. * @skb: buffer to copy * @offset: offset in the buffer to start copying to * @from: io vector to copy to + * @from_offset: offset in the io vector to start copying from * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. - * Note: the iovec is modified during the copy. + * Note: the iovec is not modified during the copy. */ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, int len) + const struct iovec *from, int from_offset, + int len) { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; - if (memcpy_fromiovec(skb->data + offset, from, copy)) + if (memcpy_fromiovecend(skb->data + offset, from, from_offset, + copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; + from_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -393,8 +486,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap(page); - err = memcpy_fromiovec(vaddr + frag->page_offset + - offset - start, from, copy); + err = memcpy_fromiovecend(vaddr + frag->page_offset + + offset - start, + from, from_offset, copy); kunmap(page); if (err) goto fault; @@ -402,32 +496,32 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (!(len -= copy)) return 0; offset += copy; + from_offset += copy; } start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_from_iovec(list, - offset - start, - from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_from_iovec(frag_iter, + offset - start, + from, + from_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + from_offset += copy; } + start = end; } if (!len) return 0; @@ -442,8 +536,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, __wsum *csump) { int start = skb_headlen(skb); - int pos = 0; int i, copy = start - offset; + struct sk_buff *frag_iter; + int pos = 0; /* Copy header. */ if (copy > 0) { @@ -494,33 +589,29 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list=list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - __wsum csum2 = 0; - if (copy > len) - copy = len; - if (skb_copy_and_csum_datagram(list, - offset - start, - to, copy, - &csum2)) - goto fault; - *csump = csum_block_add(*csump, csum2, pos); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - pos += copy; - } - start = end; + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + __wsum csum2 = 0; + if (copy > len) + copy = len; + if (skb_copy_and_csum_datagram(frag_iter, + offset - start, + to, copy, + &csum2)) + goto fault; + *csump = csum_block_add(*csump, csum2, pos); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + pos += copy; } + start = end; } if (!len) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index e2e9e4af3ac..576a61574a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -126,6 +126,7 @@ #include <linux/in.h> #include <linux/jhash.h> #include <linux/random.h> +#include <trace/events/napi.h> #include "net-sysfs.h" @@ -268,7 +269,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -285,7 +287,8 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; @@ -1047,7 +1050,7 @@ void dev_load(struct net *net, const char *name) int dev_open(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; - int ret = 0; + int ret; ASSERT_RTNL(); @@ -1064,6 +1067,11 @@ int dev_open(struct net_device *dev) if (!netif_device_present(dev)) return -ENODEV; + ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); + ret = notifier_to_errno(ret); + if (ret) + return ret; + /* * Call device private open method */ @@ -1688,7 +1696,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } + /* + * If device doesnt need skb->dst, release it right now while + * its hot in this cpu cache + */ + if (dev->priv_flags & IFF_XMIT_DST_RELEASE) + skb_dst_drop(skb); + rc = ops->ndo_start_xmit(skb, dev); + if (rc == 0) + txq_trans_update(txq); /* * TODO: if skb_orphan() was called by * dev->hard_start_xmit() (for example, the unmodified @@ -1718,6 +1735,7 @@ gso: skb->next = nskb; return rc; } + txq_trans_update(txq); if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1735,8 +1753,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { u32 hash; - if (skb_rx_queue_recorded(skb)) - return skb_get_rx_queue(skb) % dev->real_num_tx_queues; + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely (hash >= dev->real_num_tx_queues)) + hash -= dev->real_num_tx_queues; + return hash; + } if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; @@ -1800,7 +1822,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (netif_needs_gso(dev, skb)) goto gso; - if (skb_shinfo(skb)->frag_list && + if (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST) && __skb_linearize(skb)) goto out_kfree_skb; @@ -2049,11 +2071,13 @@ static inline int deliver_skb(struct sk_buff *skb, } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -/* These hooks defined here for ATM */ -struct net_bridge; -struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, - unsigned char *addr); -void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; + +#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +/* This hook is defined here for ATM LANE */ +int (*br_fdb_test_addr_hook)(struct net_device *dev, + unsigned char *addr) __read_mostly; +EXPORT_SYMBOL(br_fdb_test_addr_hook); +#endif /* * If bridge module is loaded call bridging hook. @@ -2061,6 +2085,8 @@ void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; */ struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL(br_handle_frame_hook); + static inline struct sk_buff *handle_bridge(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) @@ -2374,26 +2400,6 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) -{ - unsigned int offset = skb_gro_offset(skb); - - hlen += offset; - if (hlen <= skb_headlen(skb)) - return skb->data + offset; - - if (unlikely(!skb_shinfo(skb)->nr_frags || - skb_shinfo(skb)->frags[0].size <= - hlen - skb_headlen(skb) || - PageHighMem(skb_shinfo(skb)->frags[0].page))) - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; - - return page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset + - offset - skb_headlen(skb); -} -EXPORT_SYMBOL(skb_gro_header); - int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2407,7 +2413,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + if (skb_is_gso(skb) || skb_has_frags(skb)) goto normal; rcu_read_lock(); @@ -2456,10 +2462,25 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) ret = GRO_HELD; pull: - if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { - if (napi->gro_list == skb) - napi->gro_list = skb->next; - ret = GRO_DROP; + if (skb_headlen(skb) < skb_gro_offset(skb)) { + int grow = skb_gro_offset(skb) - skb_headlen(skb); + + BUG_ON(skb->end - skb->tail < grow); + + memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); + + skb->tail += grow; + skb->data_len -= grow; + + skb_shinfo(skb)->frags[0].page_offset += grow; + skb_shinfo(skb)->frags[0].size -= grow; + + if (unlikely(!skb_shinfo(skb)->frags[0].size)) { + put_page(skb_shinfo(skb)->frags[0].page); + memmove(skb_shinfo(skb)->frags, + skb_shinfo(skb)->frags + 1, + --skb_shinfo(skb)->nr_frags); + } } ok: @@ -2509,6 +2530,22 @@ int napi_skb_finish(int ret, struct sk_buff *skb) } EXPORT_SYMBOL(napi_skb_finish); +void skb_gro_reset_offset(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb->mac_header == skb->tail && + !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + NAPI_GRO_CB(skb)->frag0 = + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; + NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + } +} +EXPORT_SYMBOL(skb_gro_reset_offset); + int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); @@ -2526,16 +2563,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) } EXPORT_SYMBOL(napi_reuse_skb); -struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, - struct napi_gro_fraginfo *info) +struct sk_buff *napi_get_frags(struct napi_struct *napi) { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - skb_frag_t *frag; - int i; - - napi->skb = NULL; if (!skb) { skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); @@ -2543,47 +2574,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, goto out; skb_reserve(skb, NET_IP_ALIGN); - } - BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = info->frags; - - for (i = 0; i < info->nr_frags; i++) { - skb_fill_page_desc(skb, i, frag->page, frag->page_offset, - frag->size); - frag++; - } - skb_shinfo(skb)->nr_frags = info->nr_frags; - - skb->data_len = info->len; - skb->len += info->len; - skb->truesize += info->len; - - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - eth = skb_gro_header(skb, sizeof(*eth)); - if (!eth) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; + napi->skb = skb; } - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - - skb->ip_summed = info->ip_summed; - skb->csum = info->csum; - out: return skb; } -EXPORT_SYMBOL(napi_fraginfo_skb); +EXPORT_SYMBOL(napi_get_frags); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { @@ -2613,9 +2611,46 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) } EXPORT_SYMBOL(napi_frags_finish); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +struct sk_buff *napi_frags_skb(struct napi_struct *napi) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); + struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + unsigned int hlen; + unsigned int off; + + napi->skb = NULL; + + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + off = skb_gro_offset(skb); + hlen = off + sizeof(*eth); + eth = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + eth = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!eth)) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } + } + + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; + +out: + return skb; +} +EXPORT_SYMBOL(napi_frags_skb); + +int napi_gro_frags(struct napi_struct *napi) +{ + struct sk_buff *skb = napi_frags_skb(napi); if (!skb) return NET_RX_DROP; @@ -2719,7 +2754,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); - kfree_skb(napi->skb); + napi_free_frags(napi); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; @@ -2773,8 +2808,10 @@ static void net_rx_action(struct softirq_action *h) * accidently calling ->poll() when NAPI is not scheduled. */ work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) + if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); + trace_napi_poll(n); + } WARN_ON_ONCE(work > weight); @@ -3444,6 +3481,319 @@ void dev_set_rx_mode(struct net_device *dev) netif_addr_unlock_bh(dev); } +/* hw addresses list handling functions */ + +static int __hw_addr_add(struct list_head *list, int *delta, + unsigned char *addr, int addr_len, + unsigned char addr_type) +{ + struct netdev_hw_addr *ha; + int alloc_size; + + if (addr_len > MAX_ADDR_LEN) + return -EINVAL; + + list_for_each_entry(ha, list, list) { + if (!memcmp(ha->addr, addr, addr_len) && + ha->type == addr_type) { + ha->refcount++; + return 0; + } + } + + + alloc_size = sizeof(*ha); + if (alloc_size < L1_CACHE_BYTES) + alloc_size = L1_CACHE_BYTES; + ha = kmalloc(alloc_size, GFP_ATOMIC); + if (!ha) + return -ENOMEM; + memcpy(ha->addr, addr, addr_len); + ha->type = addr_type; + ha->refcount = 1; + ha->synced = false; + list_add_tail_rcu(&ha->list, list); + if (delta) + (*delta)++; + return 0; +} + +static void ha_rcu_free(struct rcu_head *head) +{ + struct netdev_hw_addr *ha; + + ha = container_of(head, struct netdev_hw_addr, rcu_head); + kfree(ha); +} + +static int __hw_addr_del(struct list_head *list, int *delta, + unsigned char *addr, int addr_len, + unsigned char addr_type) +{ + struct netdev_hw_addr *ha; + + list_for_each_entry(ha, list, list) { + if (!memcmp(ha->addr, addr, addr_len) && + (ha->type == addr_type || !addr_type)) { + if (--ha->refcount) + return 0; + list_del_rcu(&ha->list); + call_rcu(&ha->rcu_head, ha_rcu_free); + if (delta) + (*delta)--; + return 0; + } + } + return -ENOENT; +} + +static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int addr_len, + unsigned char addr_type) +{ + int err; + struct netdev_hw_addr *ha, *ha2; + unsigned char type; + + list_for_each_entry(ha, from_list, list) { + type = addr_type ? addr_type : ha->type; + err = __hw_addr_add(to_list, to_delta, ha->addr, + addr_len, type); + if (err) + goto unroll; + } + return 0; + +unroll: + list_for_each_entry(ha2, from_list, list) { + if (ha2 == ha) + break; + type = addr_type ? addr_type : ha2->type; + __hw_addr_del(to_list, to_delta, ha2->addr, + addr_len, type); + } + return err; +} + +static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int addr_len, + unsigned char addr_type) +{ + struct netdev_hw_addr *ha; + unsigned char type; + + list_for_each_entry(ha, from_list, list) { + type = addr_type ? addr_type : ha->type; + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, addr_type); + } +} + +static int __hw_addr_sync(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int *from_delta, + int addr_len) +{ + int err = 0; + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, from_list, list) { + if (!ha->synced) { + err = __hw_addr_add(to_list, to_delta, ha->addr, + addr_len, ha->type); + if (err) + break; + ha->synced = true; + ha->refcount++; + } else if (ha->refcount == 1) { + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, ha->type); + __hw_addr_del(from_list, from_delta, ha->addr, + addr_len, ha->type); + } + } + return err; +} + +static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, + struct list_head *from_list, int *from_delta, + int addr_len) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, from_list, list) { + if (ha->synced) { + __hw_addr_del(to_list, to_delta, ha->addr, + addr_len, ha->type); + ha->synced = false; + __hw_addr_del(from_list, from_delta, ha->addr, + addr_len, ha->type); + } + } +} + + +static void __hw_addr_flush(struct list_head *list) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, list, list) { + list_del_rcu(&ha->list); + call_rcu(&ha->rcu_head, ha_rcu_free); + } +} + +/* Device addresses handling functions */ + +static void dev_addr_flush(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + __hw_addr_flush(&dev->dev_addr_list); + dev->dev_addr = NULL; +} + +static int dev_addr_init(struct net_device *dev) +{ + unsigned char addr[MAX_ADDR_LEN]; + struct netdev_hw_addr *ha; + int err; + + /* rtnl_mutex must be held here */ + + INIT_LIST_HEAD(&dev->dev_addr_list); + memset(addr, 0, sizeof(addr)); + err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr), + NETDEV_HW_ADDR_T_LAN); + if (!err) { + /* + * Get the first (previously created) address from the list + * and set dev_addr pointer to this location. + */ + ha = list_first_entry(&dev->dev_addr_list, + struct netdev_hw_addr, list); + dev->dev_addr = ha->addr; + } + return err; +} + +/** + * dev_addr_add - Add a device address + * @dev: device + * @addr: address to add + * @addr_type: address type + * + * Add a device address to the device or increase the reference count if + * it already exists. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_add(struct net_device *dev, unsigned char *addr, + unsigned char addr_type) +{ + int err; + + ASSERT_RTNL(); + + err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, + addr_type); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} +EXPORT_SYMBOL(dev_addr_add); + +/** + * dev_addr_del - Release a device address. + * @dev: device + * @addr: address to delete + * @addr_type: address type + * + * Release reference to a device address and remove it from the device + * if the reference count drops to zero. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_del(struct net_device *dev, unsigned char *addr, + unsigned char addr_type) +{ + int err; + struct netdev_hw_addr *ha; + + ASSERT_RTNL(); + + /* + * We can not remove the first address from the list because + * dev->dev_addr points to that. + */ + ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + if (ha->addr == dev->dev_addr && ha->refcount == 1) + return -ENOENT; + + err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + addr_type); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return err; +} +EXPORT_SYMBOL(dev_addr_del); + +/** + * dev_addr_add_multiple - Add device addresses from another device + * @to_dev: device to which addresses will be added + * @from_dev: device from which addresses will be added + * @addr_type: address type - 0 means type will be used from from_dev + * + * Add device addresses of the one device to another. + ** + * The caller must hold the rtnl_mutex. + */ +int dev_addr_add_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type) +{ + int err; + + ASSERT_RTNL(); + + if (from_dev->addr_len != to_dev->addr_len) + return -EINVAL; + err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type); + if (!err) + call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); + return err; +} +EXPORT_SYMBOL(dev_addr_add_multiple); + +/** + * dev_addr_del_multiple - Delete device addresses by another device + * @to_dev: device where the addresses will be deleted + * @from_dev: device by which addresses the addresses will be deleted + * @addr_type: address type - 0 means type will used from from_dev + * + * Deletes addresses in to device by the list of addresses in from device. + * + * The caller must hold the rtnl_mutex. + */ +int dev_addr_del_multiple(struct net_device *to_dev, + struct net_device *from_dev, + unsigned char addr_type) +{ + ASSERT_RTNL(); + + if (from_dev->addr_len != to_dev->addr_len) + return -EINVAL; + __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, + &from_dev->dev_addr_list, + to_dev->addr_len, addr_type); + call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); + return 0; +} +EXPORT_SYMBOL(dev_addr_del_multiple); + +/* unicast and multicast addresses handling functions */ + int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) { @@ -3506,24 +3856,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, * dev_unicast_delete - Release secondary unicast address. * @dev: device * @addr: address to delete - * @alen: length of @addr * * Release reference to a secondary unicast address and remove it * from the device if the reference count drops to zero. * * The caller must hold the rtnl_mutex. */ -int dev_unicast_delete(struct net_device *dev, void *addr, int alen) +int dev_unicast_delete(struct net_device *dev, void *addr) { int err; ASSERT_RTNL(); - netif_addr_lock_bh(dev); - err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); + err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, + dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -3532,24 +3880,22 @@ EXPORT_SYMBOL(dev_unicast_delete); * dev_unicast_add - add a secondary unicast address * @dev: device * @addr: address to add - * @alen: length of @addr * * Add a secondary unicast address to the device or increase * the reference count if it already exists. * * The caller must hold the rtnl_mutex. */ -int dev_unicast_add(struct net_device *dev, void *addr, int alen) +int dev_unicast_add(struct net_device *dev, void *addr) { int err; ASSERT_RTNL(); - netif_addr_lock_bh(dev); - err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); + err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, + dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); - netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3606,8 +3952,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, * @from: source device * * Add newly added addresses to the destination device and release - * addresses that have no users left. The source device must be - * locked by netif_tx_lock_bh. + * addresses that have no users left. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -3616,12 +3961,15 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_addr_lock_bh(to); - err = __dev_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); + ASSERT_RTNL(); + + if (to->addr_len != from->addr_len) + return -EINVAL; + + err = __hw_addr_sync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count, to->addr_len); if (!err) __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3637,18 +3985,33 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_addr_lock_bh(from); - netif_addr_lock(to); + ASSERT_RTNL(); - __dev_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count); - __dev_set_rx_mode(to); + if (to->addr_len != from->addr_len) + return; - netif_addr_unlock(to); - netif_addr_unlock_bh(from); + __hw_addr_unsync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count, to->addr_len); + __dev_set_rx_mode(to); } EXPORT_SYMBOL(dev_unicast_unsync); +static void dev_unicast_flush(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + __hw_addr_flush(&dev->uc_list); + dev->uc_count = 0; +} + +static void dev_unicast_init(struct net_device *dev) +{ + /* rtnl_mutex must be held here */ + + INIT_LIST_HEAD(&dev->uc_list); +} + + static void __dev_addr_discard(struct dev_addr_list **list) { struct dev_addr_list *tmp; @@ -3667,9 +4030,6 @@ static void dev_addr_discard(struct net_device *dev) { netif_addr_lock_bh(dev); - __dev_addr_discard(&dev->uc_list); - dev->uc_count = 0; - __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; @@ -3853,7 +4213,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm switch (cmd) { case SIOCGIFFLAGS: /* Get interface flags */ - ifr->ifr_flags = dev_get_flags(dev); + ifr->ifr_flags = (short) dev_get_flags(dev); return 0; case SIOCGIFMETRIC: /* Get the metric on the interface @@ -4262,6 +4622,7 @@ static void rollback_registered(struct net_device *dev) /* * Flush the unicast and multicast chains */ + dev_unicast_flush(dev); dev_addr_discard(dev); if (dev->netdev_ops->ndo_uninit) @@ -4333,39 +4694,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); -/* Some devices need to (re-)set their netdev_ops inside - * ->init() or similar. If that happens, we have to setup - * the compat pointers again. - */ -void netdev_resync_ops(struct net_device *dev) -{ -#ifdef CONFIG_COMPAT_NET_DEV_OPS - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->neigh_setup = ops->ndo_neigh_setup; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif -#endif -} -EXPORT_SYMBOL(netdev_resync_ops); - /** * register_netdevice - register a network device * @dev: device to register @@ -4405,23 +4733,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; -#ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatibility support. - * This is temporary until all network devices are converted. - */ - if (dev->netdev_ops) { - netdev_resync_ops(dev); - } else { - char drivername[64]; - pr_info("%s (%s): not using net_device_ops yet\n", - dev->name, netdev_drivername(dev, drivername, 64)); - - /* This works only because net_device_ops and the - compatibility structure are the same. */ - dev->netdev_ops = (void *) &(dev->init); - } -#endif - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -4707,13 +5018,30 @@ void netdev_run_todo(void) * the internal statistics structure is used. */ const struct net_device_stats *dev_get_stats(struct net_device *dev) - { +{ const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else - return &dev->stats; + else { + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + struct net_device_stats *stats = &dev->stats; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } + return stats; + } } EXPORT_SYMBOL(dev_get_stats); @@ -4748,18 +5076,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, struct netdev_queue *tx; struct net_device *dev; size_t alloc_size; - void *p; + struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ - alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); alloc_size += sizeof_priv; } /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN_CONST; + alloc_size += NETDEV_ALIGN - 1; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { @@ -4771,13 +5099,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, if (!tx) { printk(KERN_ERR "alloc_netdev: Unable to allocate " "tx qdiscs.\n"); - kfree(p); - return NULL; + goto free_p; } - dev = (struct net_device *) - (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p; + + if (dev_addr_init(dev)) + goto free_tx; + + dev_unicast_init(dev); + dev_net_set(dev, &init_net); dev->_tx = tx; @@ -4789,9 +5121,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); INIT_LIST_HEAD(&dev->napi_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); return dev; + +free_tx: + kfree(tx); + +free_p: + kfree(p); + return NULL; } EXPORT_SYMBOL(alloc_netdev_mq); @@ -4811,6 +5151,9 @@ void free_netdev(struct net_device *dev) kfree(dev->_tx); + /* Flush device addresses */ + dev_addr_flush(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); @@ -4970,6 +5313,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* * Flush the unicast and multicast chains */ + dev_unicast_flush(dev); dev_addr_discard(dev); netdev_unregister_kobject(dev); @@ -5325,12 +5669,6 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -EXPORT_SYMBOL(br_handle_frame_hook); -EXPORT_SYMBOL(br_fdb_get_hook); -EXPORT_SYMBOL(br_fdb_put_hook); -#endif - EXPORT_SYMBOL(dev_load); EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index b75b6cea49d..9d66fa953ab 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -22,8 +22,10 @@ #include <linux/timer.h> #include <linux/bitops.h> #include <net/genetlink.h> +#include <net/netevent.h> #include <trace/events/skb.h> +#include <trace/events/napi.h> #include <asm/unaligned.h> @@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused); * and the work handle that will send up * netlink alerts */ -struct sock *dm_sock; +static int trace_state = TRACE_OFF; +static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; struct per_cpu_dm_data { struct work_struct dm_alert_work; @@ -47,11 +50,18 @@ struct per_cpu_dm_data { struct timer_list send_timer; }; +struct dm_hw_stat_delta { + struct net_device *dev; + struct list_head list; + struct rcu_head rcu; + unsigned long last_drop_val; +}; + static struct genl_family net_drop_monitor_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "NET_DM", - .version = 1, + .version = 2, .maxattr = NET_DM_CMD_MAX, }; @@ -59,19 +69,24 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; - +static unsigned long dm_hw_check_delta = 2*HZ; +static LIST_HEAD(hw_stats_list); static void reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; + struct nlattr *nla; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); + al += sizeof(struct nlattr); + data->skb = genlmsg_new(al, GFP_KERNEL); genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, 0, NET_DM_CMD_ALERT); - msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); memset(msg, 0, al); atomic_set(&data->dm_hit_count, dm_hit_limit); } @@ -111,10 +126,11 @@ static void sched_send_work(unsigned long unused) schedule_work(&data->dm_alert_work); } -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +static void trace_drop_common(struct sk_buff *skb, void *location) { struct net_dm_alert_msg *msg; struct nlmsghdr *nlh; + struct nlattr *nla; int i; struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); @@ -127,7 +143,8 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) } nlh = (struct nlmsghdr *)data->skb->data; - msg = genlmsg_data(nlmsg_data(nlh)); + nla = genlmsg_data(nlmsg_data(nlh)); + msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { msg->points[i].count++; @@ -139,6 +156,7 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) * We need to create a new entry */ __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; msg->entries++; @@ -152,24 +170,80 @@ out: return; } +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + trace_drop_common(skb, location); +} + +static void trace_napi_poll_hit(struct napi_struct *napi) +{ + struct dm_hw_stat_delta *new_stat; + + /* + * Ratelimit our check time to dm_hw_check_delta jiffies + */ + if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + if ((new_stat->dev == napi->dev) && + (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { + trace_drop_common(NULL, NULL); + new_stat->last_drop_val = napi->dev->stats.rx_dropped; + break; + } + } + rcu_read_unlock(); +} + + +static void free_dm_hw_stat(struct rcu_head *head) +{ + struct dm_hw_stat_delta *n; + n = container_of(head, struct dm_hw_stat_delta, rcu); + kfree(n); +} + static int set_all_monitor_traces(int state) { int rc = 0; + struct dm_hw_stat_delta *new_stat = NULL; + struct dm_hw_stat_delta *temp; + + spin_lock(&trace_state_lock); switch (state) { case TRACE_ON: rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + rc |= register_trace_napi_poll(trace_napi_poll_hit); break; case TRACE_OFF: rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + rc |= unregister_trace_napi_poll(trace_napi_poll_hit); tracepoint_synchronize_unregister(); + + /* + * Clean the device list + */ + list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { + if (new_stat->dev == NULL) { + list_del_rcu(&new_stat->list); + call_rcu(&new_stat->rcu, free_dm_hw_stat); + } + } break; default: rc = 1; break; } + if (!rc) + trace_state = state; + + spin_unlock(&trace_state_lock); + if (rc) return -EINPROGRESS; return rc; @@ -197,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb, return -ENOTSUPP; } +static int dropmon_net_event(struct notifier_block *ev_block, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct dm_hw_stat_delta *new_stat = NULL; + struct dm_hw_stat_delta *tmp; + + switch (event) { + case NETDEV_REGISTER: + new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); + + if (!new_stat) + goto out; + + new_stat->dev = dev; + INIT_RCU_HEAD(&new_stat->rcu); + spin_lock(&trace_state_lock); + list_add_rcu(&new_stat->list, &hw_stats_list); + spin_unlock(&trace_state_lock); + break; + case NETDEV_UNREGISTER: + spin_lock(&trace_state_lock); + list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { + if (new_stat->dev == dev) { + new_stat->dev = NULL; + if (trace_state == TRACE_OFF) { + list_del_rcu(&new_stat->list); + call_rcu(&new_stat->rcu, free_dm_hw_stat); + break; + } + } + } + spin_unlock(&trace_state_lock); + break; + } +out: + return NOTIFY_DONE; +} static struct genl_ops dropmon_ops[] = { { @@ -213,6 +325,10 @@ static struct genl_ops dropmon_ops[] = { }, }; +static struct notifier_block dropmon_net_notifier = { + .notifier_call = dropmon_net_event +}; + static int __init init_net_drop_monitor(void) { int cpu; @@ -236,12 +352,18 @@ static int __init init_net_drop_monitor(void) ret = genl_register_ops(&net_drop_monitor_family, &dropmon_ops[i]); if (ret) { - printk(KERN_CRIT "failed to register operation %d\n", + printk(KERN_CRIT "Failed to register operation %d\n", dropmon_ops[i].cmd); goto out_unreg; } } + rc = register_netdevice_notifier(&dropmon_net_notifier); + if (rc < 0) { + printk(KERN_CRIT "Failed to register netdevice notifier\n"); + goto out_unreg; + } + rc = 0; for_each_present_cpu(cpu) { @@ -252,6 +374,7 @@ static int __init init_net_drop_monitor(void) data->send_timer.data = cpu; data->send_timer.function = sched_send_work; } + goto out; out_unreg: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98691e1466b..bd309384f8b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -299,7 +299,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; - err = ops->configure(rule, skb, nlh, frh, tb); + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -500,7 +500,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); - if (ops->fill(rule, skb, nlh, frh) < 0) + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9cc9f95b109..78e5bfc454a 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,9 +66,9 @@ NOTES. - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - + * avbps is scaled by 2^5, avpps is scaled by 2^10. + * both values are reported as 32 bit unsigned values. bps can + overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor for HZ=100 and HZ=1024 8)), maximal interval is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals @@ -86,9 +86,9 @@ struct gen_estimator spinlock_t *stats_lock; int ewma_log; u64 last_bytes; + u64 avbps; u32 last_packets; u32 avpps; - u32 avbps; struct rcu_head e_rcu; struct rb_node node; }; @@ -115,6 +115,7 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; + u64 brate; u32 npackets; u32 rate; @@ -125,14 +126,14 @@ static void est_timer(unsigned long arg) nbytes = e->bstats->bytes; npackets = e->bstats->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); + brate = (nbytes - e->last_bytes)<<(7 - idx); e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; rate = (npackets - e->last_packets)<<(12 - idx); e->last_packets = npackets; - e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; + e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); e->rate_est->pps = (e->avpps+0x1FF)>>10; skip: read_unlock(&est_lock); diff --git a/net/core/iovec.c b/net/core/iovec.c index 4c9c0121c9d..16ad45d4882 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -98,6 +98,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) } /* + * Copy kernel to iovec. Returns -EFAULT on error. + */ + +int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, + int offset, int len) +{ + int copy; + for (; len > 0; ++iov) { + /* Skip over the finished iovecs */ + if (unlikely(offset >= iov->iov_len)) { + offset -= iov->iov_len; + continue; + } + copy = min_t(unsigned int, iov->iov_len - offset, len); + if (copy_to_user(iov->iov_base + offset, kdata, copy)) + return -EFAULT; + offset = 0; + kdata += copy; + len -= copy; + } + + return 0; +} + +/* * Copy iovec to kernel. Returns -EFAULT on error. * * Note: this modifies the original iovec. @@ -122,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) } /* - * For use with ip_build_xmit + * Copy iovec from kernel. Returns -EFAULT on error. */ -int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, - int len) + +int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) { /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { @@ -236,3 +262,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(memcpy_fromiovec); EXPORT_SYMBOL(memcpy_fromiovecend); EXPORT_SYMBOL(memcpy_toiovec); +EXPORT_SYMBOL(memcpy_toiovecend); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a1cbce7fdae..163b4f5b036 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -771,6 +771,28 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } +static void neigh_invalidate(struct neighbour *neigh) +{ + struct sk_buff *skb; + + NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); + NEIGH_PRINTK2("neigh %p is failed.\n", neigh); + neigh->updated = jiffies; + + /* It is very thin place. report_unreachable is very complicated + routine. Particularly, it can hit the same neighbour entry! + + So that, we try to be accurate and avoid dead loop. --ANK + */ + while (neigh->nud_state == NUD_FAILED && + (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { + write_unlock(&neigh->lock); + neigh->ops->error_report(neigh, skb); + write_lock(&neigh->lock); + } + skb_queue_purge(&neigh->arp_queue); +} + /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) @@ -835,26 +857,9 @@ static void neigh_timer_handler(unsigned long arg) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - struct sk_buff *skb; - neigh->nud_state = NUD_FAILED; - neigh->updated = jiffies; notify = 1; - NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); - NEIGH_PRINTK2("neigh %p is failed.\n", neigh); - - /* It is very thin place. report_unreachable is very complicated - routine. Particularly, it can hit the same neighbour entry! - - So that, we try to be accurate and avoid dead loop. --ANK - */ - while (neigh->nud_state == NUD_FAILED && - (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - write_unlock(&neigh->lock); - neigh->ops->error_report(neigh, skb); - write_lock(&neigh->lock); - } - skb_queue_purge(&neigh->arp_queue); + neigh_invalidate(neigh); } if (neigh->nud_state & NUD_IN_TIMER) { @@ -1001,6 +1006,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->nud_state = new; err = 0; notify = old & NUD_VALID; + if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && + (new & NUD_FAILED)) { + neigh_invalidate(neigh); + notify = 1; + } goto out; } @@ -1088,8 +1098,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, struct neighbour *n1 = neigh; write_unlock_bh(&neigh->lock); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb->dst && skb->dst->neighbour) - n1 = skb->dst->neighbour; + if (skb_dst(skb) && skb_dst(skb)->neighbour) + n1 = skb_dst(skb)->neighbour; n1->output(skb); write_lock_bh(&neigh->lock); } @@ -1182,7 +1192,7 @@ EXPORT_SYMBOL(neigh_compat_output); int neigh_resolve_output(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh; int rc = 0; @@ -1229,7 +1239,7 @@ EXPORT_SYMBOL(neigh_resolve_output); int neigh_connected_output(struct sk_buff *skb) { int err; - struct dst_entry *dst = skb->dst; + struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; @@ -1298,8 +1308,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, if (time_before(tbl->proxy_timer.expires, sched_next)) sched_next = tbl->proxy_timer.expires; } - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); mod_timer(&tbl->proxy_timer, sched_next); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2da59a0ac4a..3994680c08b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -78,7 +78,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, goto err; if (!rtnl_trylock()) - return -ERESTARTSYS; + return restart_syscall(); if (dev_isalive(net)) { if ((ret = (*set)(net, new)) == 0) @@ -225,7 +225,8 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); ret = dev_set_alias(netdev, buf, count); rtnl_unlock(); @@ -238,7 +239,8 @@ static ssize_t show_ifalias(struct device *dev, const struct net_device *netdev = to_net_dev(dev); ssize_t ret = 0; - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); if (netdev->ifalias) ret = sprintf(buf, "%s\n", netdev->ifalias); rtnl_unlock(); @@ -497,7 +499,6 @@ int netdev_register_kobject(struct net_device *net) dev->platform_data = net; dev->groups = groups; - BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 499a67eaf3a..f1e982c508b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -25,5 +25,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/skb.h> +#include <trace/events/napi.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); + +EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e3bebd36f05..b7292a2719d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -115,41 +115,34 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) +static struct net *net_create(void) { - struct net *new_net = NULL; - int err; - - get_net(old_net); - - if (!(flags & CLONE_NEWNET)) - return old_net; - - err = -ENOMEM; - new_net = net_alloc(); - if (!new_net) - goto out_err; + struct net *net; + int rv; + net = net_alloc(); + if (!net) + return ERR_PTR(-ENOMEM); mutex_lock(&net_mutex); - err = setup_net(new_net); - if (!err) { + rv = setup_net(net); + if (rv == 0) { rtnl_lock(); - list_add_tail(&new_net->list, &net_namespace_list); + list_add_tail(&net->list, &net_namespace_list); rtnl_unlock(); } mutex_unlock(&net_mutex); + if (rv < 0) { + net_free(net); + return ERR_PTR(rv); + } + return net; +} - if (err) - goto out_free; -out: - put_net(old_net); - return new_net; - -out_free: - net_free(new_net); -out_err: - new_net = ERR_PTR(err); - goto out; +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + if (!(flags & CLONE_NEWNET)) + return get_net(old_net); + return net_create(); } static void cleanup_net(struct work_struct *work) @@ -203,9 +196,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) static int __init net_ns_init(void) { struct net_generic *ng; - int err; - printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); #ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, @@ -224,15 +215,14 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); mutex_lock(&net_mutex); - err = setup_net(&init_net); + if (setup_net(&init_net)) + panic("Could not setup the initial network namespace"); rtnl_lock(); list_add_tail(&init_net.list, &net_namespace_list); rtnl_unlock(); mutex_unlock(&net_mutex); - if (err) - panic("Could not setup the initial network namespace"); return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5873bdff61..9675f312830 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -24,6 +24,7 @@ #include <net/tcp.h> #include <net/udp.h> #include <asm/unaligned.h> +#include <trace/events/napi.h> /* * We maintain a small pool of fully-sized skbs, to make sure the @@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo, set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); @@ -175,9 +177,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; + + if (!dev || !netif_running(dev)) + return; - if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) + ops = dev->netdev_ops; + if (!ops->ndo_poll_controller) return; /* Process pending work on NIC */ @@ -296,8 +302,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (__netif_tx_trylock(txq)) { - if (!netif_tx_queue_stopped(txq)) + if (!netif_tx_queue_stopped(txq)) { status = ops->ndo_start_xmit(skb, dev); + if (status == NETDEV_TX_OK) + txq_trans_update(txq); + } __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c1..19b8c20e98a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); @@ -3438,6 +3438,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) retry_now: ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { + txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; @@ -3690,8 +3691,7 @@ out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif - if (pkt_dev->flows) - vfree(pkt_dev->flows); + vfree(pkt_dev->flows); kfree(pkt_dev); return err; } @@ -3790,8 +3790,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif - if (pkt_dev->flows) - vfree(pkt_dev->flows); + vfree(pkt_dev->flows); kfree(pkt_dev); return 0; } diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c index 86234923a3b..79687dfd695 100644 --- a/net/core/skb_dma_map.c +++ b/net/core/skb_dma_map.c @@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, if (dma_mapping_error(dev, map)) goto out_err; - sp->dma_maps[0] = map; + sp->dma_head = map; for (i = 0; i < sp->nr_frags; i++) { skb_frag_t *fp = &sp->frags[i]; @@ -28,9 +28,8 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb, fp->size, dir); if (dma_mapping_error(dev, map)) goto unwind; - sp->dma_maps[i + 1] = map; + sp->dma_maps[i] = map; } - sp->num_dma_maps = i + 1; return 0; @@ -38,10 +37,10 @@ unwind: while (--i >= 0) { skb_frag_t *fp = &sp->frags[i]; - dma_unmap_page(dev, sp->dma_maps[i + 1], + dma_unmap_page(dev, sp->dma_maps[i], fp->size, dir); } - dma_unmap_single(dev, sp->dma_maps[0], + dma_unmap_single(dev, sp->dma_head, skb_headlen(skb), dir); out_err: return -ENOMEM; @@ -54,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb, struct skb_shared_info *sp = skb_shinfo(skb); int i; - dma_unmap_single(dev, sp->dma_maps[0], + dma_unmap_single(dev, sp->dma_head, skb_headlen(skb), dir); for (i = 0; i < sp->nr_frags; i++) { skb_frag_t *fp = &sp->frags[i]; - dma_unmap_page(dev, sp->dma_maps[i + 1], + dma_unmap_page(dev, sp->dma_maps[i], fp->size, dir); } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c4906bbcd60..5c93435b034 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -39,6 +39,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/kmemcheck.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> @@ -201,6 +202,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; + kmemcheck_annotate_bitfield(skb, flags1); + kmemcheck_annotate_bitfield(skb, flags2); /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); @@ -210,13 +213,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->tx_flags.flags = 0; - shinfo->frag_list = NULL; + skb_frag_list_init(skb); memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); if (fclone) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); + kmemcheck_annotate_bitfield(child, flags1); + kmemcheck_annotate_bitfield(child, flags2); skb->fclone = SKB_FCLONE_ORIG; atomic_set(fclone_ref, 1); @@ -323,7 +328,7 @@ static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; - for (list = skb_shinfo(skb)->frag_list; list; list = list->next) + skb_walk_frags(skb, list) skb_get(list); } @@ -338,7 +343,7 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } - if (skb_shinfo(skb)->frag_list) + if (skb_has_frags(skb)) skb_drop_fraglist(skb); kfree(skb->head); @@ -381,7 +386,7 @@ static void kfree_skbmem(struct sk_buff *skb) static void skb_release_head_state(struct sk_buff *skb) { - dst_release(skb->dst); + skb_dst_drop(skb); #ifdef CONFIG_XFRM secpath_put(skb->sp); #endif @@ -503,7 +508,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->tx_flags.flags = 0; - shinfo->frag_list = NULL; + skb_frag_list_init(skb); memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); memset(skb, 0, offsetof(struct sk_buff, tail)); @@ -521,13 +526,12 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; - new->dst = dst_clone(old->dst); + skb_dst_set(new, dst_clone(skb_dst(old))); #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; + new->csum = old->csum; new->local_df = old->local_df; new->pkt_type = old->pkt_type; new->ip_summed = old->ip_summed; @@ -538,6 +542,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->protocol = old->protocol; new->mark = old->mark; + new->iif = old->iif; __nf_copy(new, old); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) @@ -550,10 +555,17 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif #endif new->vlan_tci = old->vlan_tci; +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + new->do_not_encrypt = old->do_not_encrypt; +#endif skb_copy_secmark(new, old); } +/* + * You should not add any new code to this function. Add it to + * __copy_skb_header above instead. + */ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) { #define C(x) n->x = skb->x @@ -569,16 +581,11 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->destructor = NULL; - C(iif); C(tail); C(end); C(head); C(data); C(truesize); -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - C(do_not_encrypt); - C(requeue); -#endif atomic_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); @@ -633,6 +640,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; + + kmemcheck_annotate_bitfield(n, flags1); + kmemcheck_annotate_bitfield(n, flags2); n->fclone = SKB_FCLONE_UNAVAILABLE; } @@ -755,7 +765,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(n)->nr_frags = i; } - if (skb_shinfo(skb)->frag_list) { + if (skb_has_frags(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } @@ -818,7 +828,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); - if (skb_shinfo(skb)->frag_list) + if (skb_has_frags(skb)) skb_clone_fraglist(skb); skb_release_data(skb); @@ -1090,7 +1100,7 @@ drop_pages: for (; i < nfrags; i++) put_page(skb_shinfo(skb)->frags[i].page); - if (skb_shinfo(skb)->frag_list) + if (skb_has_frags(skb)) skb_drop_fraglist(skb); goto done; } @@ -1185,7 +1195,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ - if (!skb_shinfo(skb)->frag_list) + if (!skb_has_frags(skb)) goto pull_pages; /* Estimate size of pulled pages. */ @@ -1282,8 +1292,9 @@ EXPORT_SYMBOL(__pskb_pull_tail); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { - int i, copy; int start = skb_headlen(skb); + struct sk_buff *frag_iter; + int i, copy; if (offset > (int)skb->len - len) goto fault; @@ -1325,28 +1336,23 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + skb_walk_frags(skb, frag_iter) { + int end; - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_bits(frag_iter, offset - start, to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; } + start = end; } if (!len) return 0; @@ -1531,6 +1537,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; + struct sk_buff *frag_iter; struct sock *sk = skb->sk; /* @@ -1545,13 +1552,11 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, /* * now see if we have a frag_list to map */ - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) - break; - } + skb_walk_frags(skb, frag_iter) { + if (!tlen) + break; + if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) + break; } done: @@ -1590,8 +1595,9 @@ done: int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) { - int i, copy; int start = skb_headlen(skb); + struct sk_buff *frag_iter; + int i, copy; if (offset > (int)skb->len - len) goto fault; @@ -1632,28 +1638,24 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + skb_walk_frags(skb, frag_iter) { + int end; - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_store_bits(list, offset - start, - from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - from += copy; - } - start = end; + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_store_bits(frag_iter, offset - start, + from, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + from += copy; } + start = end; } if (!len) return 0; @@ -1670,6 +1672,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; int pos = 0; /* Checksum header. */ @@ -1709,29 +1712,25 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + skb_walk_frags(skb, frag_iter) { + int end; - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - __wsum csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + __wsum csum2; + if (copy > len) + copy = len; + csum2 = skb_checksum(frag_iter, offset - start, + copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos += copy; } + start = end; } BUG_ON(len); @@ -1746,6 +1745,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; int pos = 0; /* Copy header. */ @@ -1790,31 +1790,27 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + skb_walk_frags(skb, frag_iter) { + __wsum csum2; + int end; - for (; list; list = list->next) { - __wsum csum2; - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, - offset - start, - to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + csum2 = skb_copy_and_csum_bits(frag_iter, + offset - start, + to, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos += copy; } + start = end; } BUG_ON(len); return csum; @@ -2288,7 +2284,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } @@ -2324,8 +2320,7 @@ next_skb: st->frag_data = NULL; } - if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { + if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; @@ -2636,7 +2631,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) } else skb_get(fskb2); - BUG_ON(skb_shinfo(nskb)->frag_list); + SKB_FRAG_ASSERT(nskb); skb_shinfo(nskb)->frag_list = fskb2; } @@ -2661,30 +2656,40 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) { struct sk_buff *p = *head; struct sk_buff *nskb; + struct skb_shared_info *skbinfo = skb_shinfo(skb); + struct skb_shared_info *pinfo = skb_shinfo(p); unsigned int headroom; unsigned int len = skb_gro_len(skb); + unsigned int offset = skb_gro_offset(skb); + unsigned int headlen = skb_headlen(skb); if (p->len + len >= 65536) return -E2BIG; - if (skb_shinfo(p)->frag_list) + if (pinfo->frag_list) goto merge; - else if (skb_headlen(skb) <= skb_gro_offset(skb)) { - if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags > - MAX_SKB_FRAGS) + else if (headlen <= offset) { + skb_frag_t *frag; + skb_frag_t *frag2; + int i = skbinfo->nr_frags; + int nr_frags = pinfo->nr_frags + i; + + offset -= headlen; + + if (nr_frags > MAX_SKB_FRAGS) return -E2BIG; - skb_shinfo(skb)->frags[0].page_offset += - skb_gro_offset(skb) - skb_headlen(skb); - skb_shinfo(skb)->frags[0].size -= - skb_gro_offset(skb) - skb_headlen(skb); + pinfo->nr_frags = nr_frags; + skbinfo->nr_frags = 0; - memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, - skb_shinfo(skb)->frags, - skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); + frag = pinfo->frags + nr_frags; + frag2 = skbinfo->frags + i; + do { + *--frag = *--frag2; + } while (--i); - skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; + frag->page_offset += offset; + frag->size -= offset; skb->truesize -= skb->data_len; skb->len -= skb->data_len; @@ -2715,7 +2720,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; - skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size; + skb_shinfo(nskb)->gso_size = pinfo->gso_size; skb_header_release(p); nskb->prev = p; @@ -2730,16 +2735,13 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: - if (skb_gro_offset(skb) > skb_headlen(skb)) { - skb_shinfo(skb)->frags[0].page_offset += - skb_gro_offset(skb) - skb_headlen(skb); - skb_shinfo(skb)->frags[0].size -= - skb_gro_offset(skb) - skb_headlen(skb); - skb_gro_reset_offset(skb); - skb_gro_pull(skb, skb_headlen(skb)); + if (offset > headlen) { + skbinfo->frags[0].page_offset += offset - headlen; + skbinfo->frags[0].size -= offset - headlen; + offset = headlen; } - __skb_pull(skb, skb_gro_offset(skb)); + __skb_pull(skb, offset); p->prev->next = skb; p->prev = skb; @@ -2786,6 +2788,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; int elt = 0; if (copy > 0) { @@ -2819,26 +2822,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; + skb_walk_frags(skb, frag_iter) { + int end; - WARN_ON(start > offset + len); + WARN_ON(start > offset + len); - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - elt += __skb_to_sgvec(list, sg+elt, offset - start, - copy); - if ((len -= copy) == 0) - return elt; - offset += copy; - } - start = end; + end = start + frag_iter->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, + copy); + if ((len -= copy) == 0) + return elt; + offset += copy; } + start = end; } BUG_ON(len); return elt; @@ -2886,7 +2885,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return -ENOMEM; /* Easy case. Most of packets will go this way. */ - if (!skb_shinfo(skb)->frag_list) { + if (!skb_has_frags(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more @@ -2921,7 +2920,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list || + skb_has_frags(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } @@ -2930,7 +2929,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list) { + skb_has_frags(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ @@ -3016,12 +3015,12 @@ EXPORT_SYMBOL_GPL(skb_tstamp_tx); */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { - if (unlikely(start > skb->len - 2) || - unlikely((int)start + off > skb->len - 2)) { + if (unlikely(start > skb_headlen(skb)) || + unlikely((int)start + off > skb_headlen(skb) - 2)) { if (net_ratelimit()) printk(KERN_WARNING "bad partial csum: csum=%u/%u len=%u\n", - start, off, skb->len); + start, off, skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/sock.c b/net/core/sock.c index 7dbf3ffb35c..b0ba569bc97 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { @@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { @@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_MAX" }; @@ -212,6 +215,7 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; /* Maximal space eaten by iovec or ancilliary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); +EXPORT_SYMBOL(sysctl_optmem_max); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -444,7 +448,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { - struct sock *sk=sock->sk; + struct sock *sk = sock->sk; int val; int valbool; struct linger ling; @@ -463,15 +467,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - valbool = val?1:0; + valbool = val ? 1 : 0; lock_sock(sk); - switch(optname) { + switch (optname) { case SO_DEBUG: - if (val && !capable(CAP_NET_ADMIN)) { + if (val && !capable(CAP_NET_ADMIN)) ret = -EACCES; - } else + else sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: @@ -582,7 +586,7 @@ set_rcvbuf: ret = -EINVAL; /* 1003.1g */ break; } - if (copy_from_user(&ling,optval,sizeof(ling))) { + if (copy_from_user(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } @@ -690,9 +694,8 @@ set_rcvbuf: case SO_MARK: if (!capable(CAP_NET_ADMIN)) ret = -EPERM; - else { + else sk->sk_mark = val; - } break; /* We implement the SO_SNDLOWAT etc to @@ -704,6 +707,7 @@ set_rcvbuf: release_sock(sk); return ret; } +EXPORT_SYMBOL(sock_setsockopt); int sock_getsockopt(struct socket *sock, int level, int optname, @@ -727,7 +731,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, memset(&v, 0, sizeof(v)); - switch(optname) { + switch (optname) { case SO_DEBUG: v.val = sock_flag(sk, SOCK_DBG); break; @@ -762,7 +766,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_ERROR: v.val = -sock_error(sk); - if (v.val==0) + if (v.val == 0) v.val = xchg(&sk->sk_err_soft, 0); break; @@ -816,7 +820,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RCVTIMEO: - lv=sizeof(struct timeval); + lv = sizeof(struct timeval); if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; @@ -827,7 +831,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_SNDTIMEO: - lv=sizeof(struct timeval); + lv = sizeof(struct timeval); if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { v.tm.tv_sec = 0; v.tm.tv_usec = 0; @@ -842,7 +846,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_SNDLOWAT: - v.val=1; + v.val = 1; break; case SO_PASSCRED: @@ -941,6 +945,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { + kmemcheck_annotate_bitfield(sk, flags); + if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -1002,8 +1008,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, return sk; } +EXPORT_SYMBOL(sk_alloc); -void sk_free(struct sock *sk) +static void __sk_free(struct sock *sk) { struct sk_filter *filter; @@ -1027,6 +1034,18 @@ void sk_free(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +void sk_free(struct sock *sk) +{ + /* + * We substract one from sk_wmem_alloc and can know if + * some packets are still in some tx queue. + * If not null, sock_wfree() will call __sk_free(sk) later + */ + if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + __sk_free(sk); +} +EXPORT_SYMBOL(sk_free); + /* * Last sock_put should drop referrence to sk->sk_net. It has already * been dropped in sk_change_net. Taking referrence to stopping namespace @@ -1065,7 +1084,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; atomic_set(&newsk->sk_rmem_alloc, 0); - atomic_set(&newsk->sk_wmem_alloc, 0); + /* + * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) + */ + atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); @@ -1126,7 +1148,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) out: return newsk; } - EXPORT_SYMBOL_GPL(sk_clone); void sk_setup_caps(struct sock *sk, struct dst_entry *dst) @@ -1170,13 +1191,20 @@ void __init sk_init(void) void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + int res; /* In case it might be waiting for more memory. */ - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc); if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) sk->sk_write_space(sk); - sock_put(sk); + /* + * if sk_wmem_alloc reached 0, we are last user and should + * free this sock, as sk_free() call could not do it. + */ + if (res == 0) + __sk_free(sk); } +EXPORT_SYMBOL(sock_wfree); /* * Read buffer destructor automatically called from kfree_skb. @@ -1188,6 +1216,7 @@ void sock_rfree(struct sk_buff *skb) atomic_sub(skb->truesize, &sk->sk_rmem_alloc); sk_mem_uncharge(skb->sk, skb->truesize); } +EXPORT_SYMBOL(sock_rfree); int sock_i_uid(struct sock *sk) @@ -1199,6 +1228,7 @@ int sock_i_uid(struct sock *sk) read_unlock(&sk->sk_callback_lock); return uid; } +EXPORT_SYMBOL(sock_i_uid); unsigned long sock_i_ino(struct sock *sk) { @@ -1209,6 +1239,7 @@ unsigned long sock_i_ino(struct sock *sk) read_unlock(&sk->sk_callback_lock); return ino; } +EXPORT_SYMBOL(sock_i_ino); /* * Allocate a skb from the socket's send buffer. @@ -1217,7 +1248,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { - struct sk_buff * skb = alloc_skb(size, priority); + struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); return skb; @@ -1225,6 +1256,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, } return NULL; } +EXPORT_SYMBOL(sock_wmalloc); /* * Allocate a skb from the socket's receive buffer. @@ -1261,6 +1293,7 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) } return NULL; } +EXPORT_SYMBOL(sock_kmalloc); /* * Free an option memory block. @@ -1270,11 +1303,12 @@ void sock_kfree_s(struct sock *sk, void *mem, int size) kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } +EXPORT_SYMBOL(sock_kfree_s); /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. I think, these locks should be removed for datagram sockets. */ -static long sock_wait_for_wmem(struct sock * sk, long timeo) +static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); @@ -1392,6 +1426,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, { return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); } +EXPORT_SYMBOL(sock_alloc_send_skb); static void __lock_sock(struct sock *sk) { @@ -1460,7 +1495,6 @@ int sk_wait_data(struct sock *sk, long *timeo) finish_wait(sk->sk_sleep, &wait); return rc; } - EXPORT_SYMBOL(sk_wait_data); /** @@ -1541,7 +1575,6 @@ suppress_allocation: atomic_sub(amt, prot->memory_allocated); return 0; } - EXPORT_SYMBOL(__sk_mem_schedule); /** @@ -1560,7 +1593,6 @@ void __sk_mem_reclaim(struct sock *sk) (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) *prot->memory_pressure = 0; } - EXPORT_SYMBOL(__sk_mem_reclaim); @@ -1575,78 +1607,92 @@ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_bind); int sock_no_connect(struct socket *sock, struct sockaddr *saddr, int len, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_connect); int sock_no_socketpair(struct socket *sock1, struct socket *sock2) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_socketpair); int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_accept); int sock_no_getname(struct socket *sock, struct sockaddr *saddr, int *len, int peer) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_getname); -unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) +unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) { return 0; } +EXPORT_SYMBOL(sock_no_poll); int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_ioctl); int sock_no_listen(struct socket *sock, int backlog) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_listen); int sock_no_shutdown(struct socket *sock, int how) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_shutdown); int sock_no_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_setsockopt); int sock_no_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_getsockopt); int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_sendmsg); int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len, int flags) { return -EOPNOTSUPP; } +EXPORT_SYMBOL(sock_no_recvmsg); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { /* Mirror missing mmap method error code */ return -ENODEV; } +EXPORT_SYMBOL(sock_no_mmap); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -1660,6 +1706,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz kunmap(page); return res; } +EXPORT_SYMBOL(sock_no_sendpage); /* * Default Socket Callbacks @@ -1723,6 +1770,7 @@ void sk_send_sigurg(struct sock *sk) if (send_sigurg(&sk->sk_socket->file->f_owner)) sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } +EXPORT_SYMBOL(sk_send_sigurg); void sk_reset_timer(struct sock *sk, struct timer_list* timer, unsigned long expires) @@ -1730,7 +1778,6 @@ void sk_reset_timer(struct sock *sk, struct timer_list* timer, if (!mod_timer(timer, expires)) sock_hold(sk); } - EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) @@ -1738,7 +1785,6 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer) if (timer_pending(timer) && del_timer(timer)) __sock_put(sk); } - EXPORT_SYMBOL(sk_stop_timer); void sock_init_data(struct socket *sock, struct sock *sk) @@ -1795,8 +1841,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); atomic_set(&sk->sk_refcnt, 1); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } +EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { @@ -1812,7 +1860,6 @@ void lock_sock_nested(struct sock *sk, int subclass) mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); } - EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) @@ -1895,7 +1942,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(sock_common_getsockopt); #ifdef CONFIG_COMPAT @@ -1925,7 +1971,6 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = addr_len; return err; } - EXPORT_SYMBOL(sock_common_recvmsg); /* @@ -1938,7 +1983,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(sock_common_setsockopt); #ifdef CONFIG_COMPAT @@ -1989,7 +2033,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); sock_put(sk); } - EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); @@ -2171,7 +2214,6 @@ out_free_sock_slab: out: return -ENOBUFS; } - EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) @@ -2198,7 +2240,6 @@ void proto_unregister(struct proto *prot) prot->twsk_prot->twsk_slab = NULL; } } - EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS @@ -2324,33 +2365,3 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ - -EXPORT_SYMBOL(sk_alloc); -EXPORT_SYMBOL(sk_free); -EXPORT_SYMBOL(sk_send_sigurg); -EXPORT_SYMBOL(sock_alloc_send_skb); -EXPORT_SYMBOL(sock_init_data); -EXPORT_SYMBOL(sock_kfree_s); -EXPORT_SYMBOL(sock_kmalloc); -EXPORT_SYMBOL(sock_no_accept); -EXPORT_SYMBOL(sock_no_bind); -EXPORT_SYMBOL(sock_no_connect); -EXPORT_SYMBOL(sock_no_getname); -EXPORT_SYMBOL(sock_no_getsockopt); -EXPORT_SYMBOL(sock_no_ioctl); -EXPORT_SYMBOL(sock_no_listen); -EXPORT_SYMBOL(sock_no_mmap); -EXPORT_SYMBOL(sock_no_poll); -EXPORT_SYMBOL(sock_no_recvmsg); -EXPORT_SYMBOL(sock_no_sendmsg); -EXPORT_SYMBOL(sock_no_sendpage); -EXPORT_SYMBOL(sock_no_setsockopt); -EXPORT_SYMBOL(sock_no_shutdown); -EXPORT_SYMBOL(sock_no_socketpair); -EXPORT_SYMBOL(sock_rfree); -EXPORT_SYMBOL(sock_setsockopt); -EXPORT_SYMBOL(sock_wfree); -EXPORT_SYMBOL(sock_wmalloc); -EXPORT_SYMBOL(sock_i_uid); -EXPORT_SYMBOL(sock_i_ino); -EXPORT_SYMBOL(sysctl_optmem_max); diff --git a/net/core/stream.c b/net/core/stream.c index 8727cead64a..a37debfeb1b 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -33,7 +33,8 @@ void sk_stream_write_space(struct sock *sk) clear_bit(SOCK_NOSPACE, &sock->flags); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 164b090d5ac..25d717ebc92 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -51,6 +51,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, { int start = skb_headlen(skb); int i, copy = start - offset; + struct sk_buff *frag_iter; dma_cookie_t cookie = 0; /* Copy header. */ @@ -94,31 +95,28 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, start = end; } - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - copy = end - offset; - if (copy > 0) { - if (copy > len) - copy = len; - cookie = dma_skb_copy_datagram_iovec(chan, list, - offset - start, to, copy, - pinned_list); - if (cookie < 0) - goto fault; - len -= copy; - if (len == 0) - goto end; - offset += copy; - } - start = end; + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + copy = end - offset; + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_skb_copy_datagram_iovec(chan, frag_iter, + offset - start, + to, copy, + pinned_list); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; } + start = end; } end: |