summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c241
-rw-r--r--net/core/dev.c688
-rw-r--r--net/core/drop_monitor.c137
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/gen_estimator.c15
-rw-r--r--net/core/iovec.c33
-rw-r--r--net/core/neighbour.c57
-rw-r--r--net/core/net-sysfs.c9
-rw-r--r--net/core/net-traces.c3
-rw-r--r--net/core/net_namespace.c54
-rw-r--r--net/core/netpoll.c15
-rw-r--r--net/core/pktgen.c9
-rw-r--r--net/core/skb_dma_map.c13
-rw-r--r--net/core/skbuff.c317
-rw-r--r--net/core/sock.c137
-rw-r--r--net/core/stream.c3
-rw-r--r--net/core/user_dma.c46
17 files changed, 1190 insertions, 591 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b01a76abe1d..58abee1f1df 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -260,7 +260,9 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
spin_unlock_bh(&sk->sk_receive_queue.lock);
}
- skb_free_datagram(sk, skb);
+ kfree_skb(skb);
+ sk_mem_reclaim_partial(sk);
+
return err;
}
@@ -280,6 +282,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
/* Copy header. */
if (copy > 0) {
@@ -320,28 +323,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_copy_datagram_iovec(list,
- offset - start,
- to, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- }
- start = end;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_datagram_iovec(frag_iter,
+ offset - start,
+ to, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
}
+ start = end;
}
if (!len)
return 0;
@@ -351,30 +350,124 @@ fault:
}
/**
+ * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: io vector to copy to
+ * @to_offset: offset in the io vector to start copying to
+ * @len: amount of data to copy from buffer to iovec
+ *
+ * Returns 0 or -EFAULT.
+ * Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
+ const struct iovec *to, int to_offset,
+ int len)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ struct sk_buff *frag_iter;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to_offset += copy;
+ }
+
+ /* Copy paged appendix. Hmm... why does this look so complicated? */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ int err;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap(page);
+ err = memcpy_toiovecend(to, vaddr + frag->page_offset +
+ offset - start, to_offset, copy);
+ kunmap(page);
+ if (err)
+ goto fault;
+ if (!(len -= copy))
+ return 0;
+ offset += copy;
+ to_offset += copy;
+ }
+ start = end;
+ }
+
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_datagram_const_iovec(frag_iter,
+ offset - start,
+ to, to_offset,
+ copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to_offset += copy;
+ }
+ start = end;
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
+
+/**
* skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying to
* @from: io vector to copy to
+ * @from_offset: offset in the io vector to start copying from
* @len: amount of data to copy to buffer from iovec
*
* Returns 0 or -EFAULT.
- * Note: the iovec is modified during the copy.
+ * Note: the iovec is not modified during the copy.
*/
int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
- struct iovec *from, int len)
+ const struct iovec *from, int from_offset,
+ int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
- if (memcpy_fromiovec(skb->data + offset, from, copy))
+ if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
+ copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
+ from_offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -393,8 +486,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
vaddr = kmap(page);
- err = memcpy_fromiovec(vaddr + frag->page_offset +
- offset - start, from, copy);
+ err = memcpy_fromiovecend(vaddr + frag->page_offset +
+ offset - start,
+ from, from_offset, copy);
kunmap(page);
if (err)
goto fault;
@@ -402,32 +496,32 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
if (!(len -= copy))
return 0;
offset += copy;
+ from_offset += copy;
}
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_copy_datagram_from_iovec(list,
- offset - start,
- from, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- }
- start = end;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_datagram_from_iovec(frag_iter,
+ offset - start,
+ from,
+ from_offset,
+ copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from_offset += copy;
}
+ start = end;
}
if (!len)
return 0;
@@ -442,8 +536,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
__wsum *csump)
{
int start = skb_headlen(skb);
- int pos = 0;
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
+ int pos = 0;
/* Copy header. */
if (copy > 0) {
@@ -494,33 +589,29 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list=list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- __wsum csum2 = 0;
- if (copy > len)
- copy = len;
- if (skb_copy_and_csum_datagram(list,
- offset - start,
- to, copy,
- &csum2))
- goto fault;
- *csump = csum_block_add(*csump, csum2, pos);
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- to += copy;
- pos += copy;
- }
- start = end;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ __wsum csum2 = 0;
+ if (copy > len)
+ copy = len;
+ if (skb_copy_and_csum_datagram(frag_iter,
+ offset - start,
+ to, copy,
+ &csum2))
+ goto fault;
+ *csump = csum_block_add(*csump, csum2, pos);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ pos += copy;
}
+ start = end;
}
if (!len)
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index e2e9e4af3ac..576a61574a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -126,6 +126,7 @@
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <trace/events/napi.h>
#include "net-sysfs.h"
@@ -268,7 +269,8 @@ static const unsigned short netdev_lock_type[] =
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
- ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
+ ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
+ ARPHRD_VOID, ARPHRD_NONE};
static const char *netdev_lock_name[] =
{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -285,7 +287,8 @@ static const char *netdev_lock_name[] =
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
"_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
"_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
- "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
+ "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
+ "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -1047,7 +1050,7 @@ void dev_load(struct net *net, const char *name)
int dev_open(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
- int ret = 0;
+ int ret;
ASSERT_RTNL();
@@ -1064,6 +1067,11 @@ int dev_open(struct net_device *dev)
if (!netif_device_present(dev))
return -ENODEV;
+ ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ return ret;
+
/*
* Call device private open method
*/
@@ -1688,7 +1696,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
+ /*
+ * If device doesnt need skb->dst, release it right now while
+ * its hot in this cpu cache
+ */
+ if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ skb_dst_drop(skb);
+
rc = ops->ndo_start_xmit(skb, dev);
+ if (rc == 0)
+ txq_trans_update(txq);
/*
* TODO: if skb_orphan() was called by
* dev->hard_start_xmit() (for example, the unmodified
@@ -1718,6 +1735,7 @@ gso:
skb->next = nskb;
return rc;
}
+ txq_trans_update(txq);
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -1735,8 +1753,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
u32 hash;
- if (skb_rx_queue_recorded(skb))
- return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ while (unlikely (hash >= dev->real_num_tx_queues))
+ hash -= dev->real_num_tx_queues;
+ return hash;
+ }
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
@@ -1800,7 +1822,7 @@ int dev_queue_xmit(struct sk_buff *skb)
if (netif_needs_gso(dev, skb))
goto gso;
- if (skb_shinfo(skb)->frag_list &&
+ if (skb_has_frags(skb) &&
!(dev->features & NETIF_F_FRAGLIST) &&
__skb_linearize(skb))
goto out_kfree_skb;
@@ -2049,11 +2071,13 @@ static inline int deliver_skb(struct sk_buff *skb,
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-/* These hooks defined here for ATM */
-struct net_bridge;
-struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
- unsigned char *addr);
-void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
+
+#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+/* This hook is defined here for ATM LANE */
+int (*br_fdb_test_addr_hook)(struct net_device *dev,
+ unsigned char *addr) __read_mostly;
+EXPORT_SYMBOL(br_fdb_test_addr_hook);
+#endif
/*
* If bridge module is loaded call bridging hook.
@@ -2061,6 +2085,8 @@ void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
*/
struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
struct sk_buff *skb) __read_mostly;
+EXPORT_SYMBOL(br_handle_frame_hook);
+
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
@@ -2374,26 +2400,6 @@ void napi_gro_flush(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_flush);
-void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
-{
- unsigned int offset = skb_gro_offset(skb);
-
- hlen += offset;
- if (hlen <= skb_headlen(skb))
- return skb->data + offset;
-
- if (unlikely(!skb_shinfo(skb)->nr_frags ||
- skb_shinfo(skb)->frags[0].size <=
- hlen - skb_headlen(skb) ||
- PageHighMem(skb_shinfo(skb)->frags[0].page)))
- return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
-
- return page_address(skb_shinfo(skb)->frags[0].page) +
- skb_shinfo(skb)->frags[0].page_offset +
- offset - skb_headlen(skb);
-}
-EXPORT_SYMBOL(skb_gro_header);
-
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
@@ -2407,7 +2413,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
- if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
+ if (skb_is_gso(skb) || skb_has_frags(skb))
goto normal;
rcu_read_lock();
@@ -2456,10 +2462,25 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
ret = GRO_HELD;
pull:
- if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
- if (napi->gro_list == skb)
- napi->gro_list = skb->next;
- ret = GRO_DROP;
+ if (skb_headlen(skb) < skb_gro_offset(skb)) {
+ int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+ BUG_ON(skb->end - skb->tail < grow);
+
+ memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+ skb->tail += grow;
+ skb->data_len -= grow;
+
+ skb_shinfo(skb)->frags[0].page_offset += grow;
+ skb_shinfo(skb)->frags[0].size -= grow;
+
+ if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
+ put_page(skb_shinfo(skb)->frags[0].page);
+ memmove(skb_shinfo(skb)->frags,
+ skb_shinfo(skb)->frags + 1,
+ --skb_shinfo(skb)->nr_frags);
+ }
}
ok:
@@ -2509,6 +2530,22 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_skb_finish);
+void skb_gro_reset_offset(struct sk_buff *skb)
+{
+ NAPI_GRO_CB(skb)->data_offset = 0;
+ NAPI_GRO_CB(skb)->frag0 = NULL;
+ NAPI_GRO_CB(skb)->frag0_len = 0;
+
+ if (skb->mac_header == skb->tail &&
+ !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+ NAPI_GRO_CB(skb)->frag0 =
+ page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset;
+ NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+ }
+}
+EXPORT_SYMBOL(skb_gro_reset_offset);
+
int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
skb_gro_reset_offset(skb);
@@ -2526,16 +2563,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_reuse_skb);
-struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
- struct napi_gro_fraginfo *info)
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
{
struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb;
- struct ethhdr *eth;
- skb_frag_t *frag;
- int i;
-
- napi->skb = NULL;
if (!skb) {
skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
@@ -2543,47 +2574,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
goto out;
skb_reserve(skb, NET_IP_ALIGN);
- }
- BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
- frag = info->frags;
-
- for (i = 0; i < info->nr_frags; i++) {
- skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
- frag->size);
- frag++;
- }
- skb_shinfo(skb)->nr_frags = info->nr_frags;
-
- skb->data_len = info->len;
- skb->len += info->len;
- skb->truesize += info->len;
-
- skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb);
-
- eth = skb_gro_header(skb, sizeof(*eth));
- if (!eth) {
- napi_reuse_skb(napi, skb);
- skb = NULL;
- goto out;
+ napi->skb = skb;
}
- skb_gro_pull(skb, sizeof(*eth));
-
- /*
- * This works because the only protocols we care about don't require
- * special handling. We'll fix it up properly at the end.
- */
- skb->protocol = eth->h_proto;
-
- skb->ip_summed = info->ip_summed;
- skb->csum = info->csum;
-
out:
return skb;
}
-EXPORT_SYMBOL(napi_fraginfo_skb);
+EXPORT_SYMBOL(napi_get_frags);
int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{
@@ -2613,9 +2611,46 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
}
EXPORT_SYMBOL(napi_frags_finish);
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
- struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+ struct sk_buff *skb = napi->skb;
+ struct ethhdr *eth;
+ unsigned int hlen;
+ unsigned int off;
+
+ napi->skb = NULL;
+
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*eth);
+ eth = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ eth = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!eth)) {
+ napi_reuse_skb(napi, skb);
+ skb = NULL;
+ goto out;
+ }
+ }
+
+ skb_gro_pull(skb, sizeof(*eth));
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling. We'll fix it up properly at the end.
+ */
+ skb->protocol = eth->h_proto;
+
+out:
+ return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+int napi_gro_frags(struct napi_struct *napi)
+{
+ struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
return NET_RX_DROP;
@@ -2719,7 +2754,7 @@ void netif_napi_del(struct napi_struct *napi)
struct sk_buff *skb, *next;
list_del_init(&napi->dev_list);
- kfree_skb(napi->skb);
+ napi_free_frags(napi);
for (skb = napi->gro_list; skb; skb = next) {
next = skb->next;
@@ -2773,8 +2808,10 @@ static void net_rx_action(struct softirq_action *h)
* accidently calling ->poll() when NAPI is not scheduled.
*/
work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state))
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
WARN_ON_ONCE(work > weight);
@@ -3444,6 +3481,319 @@ void dev_set_rx_mode(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
+/* hw addresses list handling functions */
+
+static int __hw_addr_add(struct list_head *list, int *delta,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ int alloc_size;
+
+ if (addr_len > MAX_ADDR_LEN)
+ return -EINVAL;
+
+ list_for_each_entry(ha, list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ ha->type == addr_type) {
+ ha->refcount++;
+ return 0;
+ }
+ }
+
+
+ alloc_size = sizeof(*ha);
+ if (alloc_size < L1_CACHE_BYTES)
+ alloc_size = L1_CACHE_BYTES;
+ ha = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!ha)
+ return -ENOMEM;
+ memcpy(ha->addr, addr, addr_len);
+ ha->type = addr_type;
+ ha->refcount = 1;
+ ha->synced = false;
+ list_add_tail_rcu(&ha->list, list);
+ if (delta)
+ (*delta)++;
+ return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+ struct netdev_hw_addr *ha;
+
+ ha = container_of(head, struct netdev_hw_addr, rcu_head);
+ kfree(ha);
+}
+
+static int __hw_addr_del(struct list_head *list, int *delta,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+
+ list_for_each_entry(ha, list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ (ha->type == addr_type || !addr_type)) {
+ if (--ha->refcount)
+ return 0;
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ if (delta)
+ (*delta)--;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta,
+ struct list_head *from_list, int addr_len,
+ unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha, *ha2;
+ unsigned char type;
+
+ list_for_each_entry(ha, from_list, list) {
+ type = addr_type ? addr_type : ha->type;
+ err = __hw_addr_add(to_list, to_delta, ha->addr,
+ addr_len, type);
+ if (err)
+ goto unroll;
+ }
+ return 0;
+
+unroll:
+ list_for_each_entry(ha2, from_list, list) {
+ if (ha2 == ha)
+ break;
+ type = addr_type ? addr_type : ha2->type;
+ __hw_addr_del(to_list, to_delta, ha2->addr,
+ addr_len, type);
+ }
+ return err;
+}
+
+static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta,
+ struct list_head *from_list, int addr_len,
+ unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ unsigned char type;
+
+ list_for_each_entry(ha, from_list, list) {
+ type = addr_type ? addr_type : ha->type;
+ __hw_addr_del(to_list, to_delta, ha->addr,
+ addr_len, addr_type);
+ }
+}
+
+static int __hw_addr_sync(struct list_head *to_list, int *to_delta,
+ struct list_head *from_list, int *from_delta,
+ int addr_len)
+{
+ int err = 0;
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, from_list, list) {
+ if (!ha->synced) {
+ err = __hw_addr_add(to_list, to_delta, ha->addr,
+ addr_len, ha->type);
+ if (err)
+ break;
+ ha->synced = true;
+ ha->refcount++;
+ } else if (ha->refcount == 1) {
+ __hw_addr_del(to_list, to_delta, ha->addr,
+ addr_len, ha->type);
+ __hw_addr_del(from_list, from_delta, ha->addr,
+ addr_len, ha->type);
+ }
+ }
+ return err;
+}
+
+static void __hw_addr_unsync(struct list_head *to_list, int *to_delta,
+ struct list_head *from_list, int *from_delta,
+ int addr_len)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, from_list, list) {
+ if (ha->synced) {
+ __hw_addr_del(to_list, to_delta, ha->addr,
+ addr_len, ha->type);
+ ha->synced = false;
+ __hw_addr_del(from_list, from_delta, ha->addr,
+ addr_len, ha->type);
+ }
+ }
+}
+
+
+static void __hw_addr_flush(struct list_head *list)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, list, list) {
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ }
+}
+
+/* Device addresses handling functions */
+
+static void dev_addr_flush(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_flush(&dev->dev_addr_list);
+ dev->dev_addr = NULL;
+}
+
+static int dev_addr_init(struct net_device *dev)
+{
+ unsigned char addr[MAX_ADDR_LEN];
+ struct netdev_hw_addr *ha;
+ int err;
+
+ /* rtnl_mutex must be held here */
+
+ INIT_LIST_HEAD(&dev->dev_addr_list);
+ memset(addr, 0, sizeof(addr));
+ err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr),
+ NETDEV_HW_ADDR_T_LAN);
+ if (!err) {
+ /*
+ * Get the first (previously created) address from the list
+ * and set dev_addr pointer to this location.
+ */
+ ha = list_first_entry(&dev->dev_addr_list,
+ struct netdev_hw_addr, list);
+ dev->dev_addr = ha->addr;
+ }
+ return err;
+}
+
+/**
+ * dev_addr_add - Add a device address
+ * @dev: device
+ * @addr: address to add
+ * @addr_type: address type
+ *
+ * Add a device address to the device or increase the reference count if
+ * it already exists.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+ addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ * dev_addr_del - Release a device address.
+ * @dev: device
+ * @addr: address to delete
+ * @addr_type: address type
+ *
+ * Release reference to a device address and remove it from the device
+ * if the reference count drops to zero.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha;
+
+ ASSERT_RTNL();
+
+ /*
+ * We can not remove the first address from the list because
+ * dev->dev_addr points to that.
+ */
+ ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list);
+ if (ha->addr == dev->dev_addr && ha->refcount == 1)
+ return -ENOENT;
+
+ err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+ addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ * dev_addr_add_multiple - Add device addresses from another device
+ * @to_dev: device to which addresses will be added
+ * @from_dev: device from which addresses will be added
+ * @addr_type: address type - 0 means type will be used from from_dev
+ *
+ * Add device addresses of the one device to another.
+ **
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL,
+ &from_dev->dev_addr_list,
+ to_dev->addr_len, addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ * dev_addr_del_multiple - Delete device addresses by another device
+ * @to_dev: device where the addresses will be deleted
+ * @from_dev: device by which addresses the addresses will be deleted
+ * @addr_type: address type - 0 means type will used from from_dev
+ *
+ * Deletes addresses in to device by the list of addresses in from device.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL,
+ &from_dev->dev_addr_list,
+ to_dev->addr_len, addr_type);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/* unicast and multicast addresses handling functions */
+
int __dev_addr_delete(struct dev_addr_list **list, int *count,
void *addr, int alen, int glbl)
{
@@ -3506,24 +3856,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
* dev_unicast_delete - Release secondary unicast address.
* @dev: device
* @addr: address to delete
- * @alen: length of @addr
*
* Release reference to a secondary unicast address and remove it
* from the device if the reference count drops to zero.
*
* The caller must hold the rtnl_mutex.
*/
-int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
+int dev_unicast_delete(struct net_device *dev, void *addr)
{
int err;
ASSERT_RTNL();
- netif_addr_lock_bh(dev);
- err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+ err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr,
+ dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
if (!err)
__dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_delete);
@@ -3532,24 +3880,22 @@ EXPORT_SYMBOL(dev_unicast_delete);
* dev_unicast_add - add a secondary unicast address
* @dev: device
* @addr: address to add
- * @alen: length of @addr
*
* Add a secondary unicast address to the device or increase
* the reference count if it already exists.
*
* The caller must hold the rtnl_mutex.
*/
-int dev_unicast_add(struct net_device *dev, void *addr, int alen)
+int dev_unicast_add(struct net_device *dev, void *addr)
{
int err;
ASSERT_RTNL();
- netif_addr_lock_bh(dev);
- err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+ err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr,
+ dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
if (!err)
__dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_add);
@@ -3606,8 +3952,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
* @from: source device
*
* Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
+ * addresses that have no users left.
*
* This function is intended to be called from the dev->set_rx_mode
* function of layered software devices.
@@ -3616,12 +3961,15 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
{
int err = 0;
- netif_addr_lock_bh(to);
- err = __dev_addr_sync(&to->uc_list, &to->uc_count,
- &from->uc_list, &from->uc_count);
+ ASSERT_RTNL();
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ err = __hw_addr_sync(&to->uc_list, &to->uc_count,
+ &from->uc_list, &from->uc_count, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
return err;
}
EXPORT_SYMBOL(dev_unicast_sync);
@@ -3637,18 +3985,33 @@ EXPORT_SYMBOL(dev_unicast_sync);
*/
void dev_unicast_unsync(struct net_device *to, struct net_device *from)
{
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ ASSERT_RTNL();
- __dev_addr_unsync(&to->uc_list, &to->uc_count,
- &from->uc_list, &from->uc_count);
- __dev_set_rx_mode(to);
+ if (to->addr_len != from->addr_len)
+ return;
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
+ __hw_addr_unsync(&to->uc_list, &to->uc_count,
+ &from->uc_list, &from->uc_count, to->addr_len);
+ __dev_set_rx_mode(to);
}
EXPORT_SYMBOL(dev_unicast_unsync);
+static void dev_unicast_flush(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_flush(&dev->uc_list);
+ dev->uc_count = 0;
+}
+
+static void dev_unicast_init(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ INIT_LIST_HEAD(&dev->uc_list);
+}
+
+
static void __dev_addr_discard(struct dev_addr_list **list)
{
struct dev_addr_list *tmp;
@@ -3667,9 +4030,6 @@ static void dev_addr_discard(struct net_device *dev)
{
netif_addr_lock_bh(dev);
- __dev_addr_discard(&dev->uc_list);
- dev->uc_count = 0;
-
__dev_addr_discard(&dev->mc_list);
dev->mc_count = 0;
@@ -3853,7 +4213,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
switch (cmd) {
case SIOCGIFFLAGS: /* Get interface flags */
- ifr->ifr_flags = dev_get_flags(dev);
+ ifr->ifr_flags = (short) dev_get_flags(dev);
return 0;
case SIOCGIFMETRIC: /* Get the metric on the interface
@@ -4262,6 +4622,7 @@ static void rollback_registered(struct net_device *dev)
/*
* Flush the unicast and multicast chains
*/
+ dev_unicast_flush(dev);
dev_addr_discard(dev);
if (dev->netdev_ops->ndo_uninit)
@@ -4333,39 +4694,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
}
EXPORT_SYMBOL(netdev_fix_features);
-/* Some devices need to (re-)set their netdev_ops inside
- * ->init() or similar. If that happens, we have to setup
- * the compat pointers again.
- */
-void netdev_resync_ops(struct net_device *dev)
-{
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
- const struct net_device_ops *ops = dev->netdev_ops;
-
- dev->init = ops->ndo_init;
- dev->uninit = ops->ndo_uninit;
- dev->open = ops->ndo_open;
- dev->change_rx_flags = ops->ndo_change_rx_flags;
- dev->set_rx_mode = ops->ndo_set_rx_mode;
- dev->set_multicast_list = ops->ndo_set_multicast_list;
- dev->set_mac_address = ops->ndo_set_mac_address;
- dev->validate_addr = ops->ndo_validate_addr;
- dev->do_ioctl = ops->ndo_do_ioctl;
- dev->set_config = ops->ndo_set_config;
- dev->change_mtu = ops->ndo_change_mtu;
- dev->neigh_setup = ops->ndo_neigh_setup;
- dev->tx_timeout = ops->ndo_tx_timeout;
- dev->get_stats = ops->ndo_get_stats;
- dev->vlan_rx_register = ops->ndo_vlan_rx_register;
- dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
- dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
- dev->poll_controller = ops->ndo_poll_controller;
-#endif
-#endif
-}
-EXPORT_SYMBOL(netdev_resync_ops);
-
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -4405,23 +4733,6 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
- /* Netdevice_ops API compatibility support.
- * This is temporary until all network devices are converted.
- */
- if (dev->netdev_ops) {
- netdev_resync_ops(dev);
- } else {
- char drivername[64];
- pr_info("%s (%s): not using net_device_ops yet\n",
- dev->name, netdev_drivername(dev, drivername, 64));
-
- /* This works only because net_device_ops and the
- compatibility structure are the same. */
- dev->netdev_ops = (void *) &(dev->init);
- }
-#endif
-
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -4707,13 +5018,30 @@ void netdev_run_todo(void)
* the internal statistics structure is used.
*/
const struct net_device_stats *dev_get_stats(struct net_device *dev)
- {
+{
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_get_stats)
return ops->ndo_get_stats(dev);
- else
- return &dev->stats;
+ else {
+ unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+ struct net_device_stats *stats = &dev->stats;
+ unsigned int i;
+ struct netdev_queue *txq;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ txq = netdev_get_tx_queue(dev, i);
+ tx_bytes += txq->tx_bytes;
+ tx_packets += txq->tx_packets;
+ tx_dropped += txq->tx_dropped;
+ }
+ if (tx_bytes || tx_packets || tx_dropped) {
+ stats->tx_bytes = tx_bytes;
+ stats->tx_packets = tx_packets;
+ stats->tx_dropped = tx_dropped;
+ }
+ return stats;
+ }
}
EXPORT_SYMBOL(dev_get_stats);
@@ -4748,18 +5076,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
struct netdev_queue *tx;
struct net_device *dev;
size_t alloc_size;
- void *p;
+ struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
- alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
- alloc_size += NETDEV_ALIGN_CONST;
+ alloc_size += NETDEV_ALIGN - 1;
p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
@@ -4771,13 +5099,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
if (!tx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
"tx qdiscs.\n");
- kfree(p);
- return NULL;
+ goto free_p;
}
- dev = (struct net_device *)
- (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+ dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+
+ if (dev_addr_init(dev))
+ goto free_tx;
+
+ dev_unicast_init(dev);
+
dev_net_set(dev, &init_net);
dev->_tx = tx;
@@ -4789,9 +5121,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
netdev_init_queues(dev);
INIT_LIST_HEAD(&dev->napi_list);
+ dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
return dev;
+
+free_tx:
+ kfree(tx);
+
+free_p:
+ kfree(p);
+ return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mq);
@@ -4811,6 +5151,9 @@ void free_netdev(struct net_device *dev)
kfree(dev->_tx);
+ /* Flush device addresses */
+ dev_addr_flush(dev);
+
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
@@ -4970,6 +5313,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/*
* Flush the unicast and multicast chains
*/
+ dev_unicast_flush(dev);
dev_addr_discard(dev);
netdev_unregister_kobject(dev);
@@ -5325,12 +5669,6 @@ EXPORT_SYMBOL(net_enable_timestamp);
EXPORT_SYMBOL(net_disable_timestamp);
EXPORT_SYMBOL(dev_get_flags);
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-EXPORT_SYMBOL(br_handle_frame_hook);
-EXPORT_SYMBOL(br_fdb_get_hook);
-EXPORT_SYMBOL(br_fdb_put_hook);
-#endif
-
EXPORT_SYMBOL(dev_load);
EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b75b6cea49d..9d66fa953ab 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -22,8 +22,10 @@
#include <linux/timer.h>
#include <linux/bitops.h>
#include <net/genetlink.h>
+#include <net/netevent.h>
#include <trace/events/skb.h>
+#include <trace/events/napi.h>
#include <asm/unaligned.h>
@@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused);
* and the work handle that will send up
* netlink alerts
*/
-struct sock *dm_sock;
+static int trace_state = TRACE_OFF;
+static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED;
struct per_cpu_dm_data {
struct work_struct dm_alert_work;
@@ -47,11 +50,18 @@ struct per_cpu_dm_data {
struct timer_list send_timer;
};
+struct dm_hw_stat_delta {
+ struct net_device *dev;
+ struct list_head list;
+ struct rcu_head rcu;
+ unsigned long last_drop_val;
+};
+
static struct genl_family net_drop_monitor_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = "NET_DM",
- .version = 1,
+ .version = 2,
.maxattr = NET_DM_CMD_MAX,
};
@@ -59,19 +69,24 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
-
+static unsigned long dm_hw_check_delta = 2*HZ;
+static LIST_HEAD(hw_stats_list);
static void reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
struct net_dm_alert_msg *msg;
+ struct nlattr *nla;
al = sizeof(struct net_dm_alert_msg);
al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+ al += sizeof(struct nlattr);
+
data->skb = genlmsg_new(al, GFP_KERNEL);
genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
0, NET_DM_CMD_ALERT);
- msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg));
+ nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
+ msg = nla_data(nla);
memset(msg, 0, al);
atomic_set(&data->dm_hit_count, dm_hit_limit);
}
@@ -111,10 +126,11 @@ static void sched_send_work(unsigned long unused)
schedule_work(&data->dm_alert_work);
}
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_drop_common(struct sk_buff *skb, void *location)
{
struct net_dm_alert_msg *msg;
struct nlmsghdr *nlh;
+ struct nlattr *nla;
int i;
struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
@@ -127,7 +143,8 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
}
nlh = (struct nlmsghdr *)data->skb->data;
- msg = genlmsg_data(nlmsg_data(nlh));
+ nla = genlmsg_data(nlmsg_data(nlh));
+ msg = nla_data(nla);
for (i = 0; i < msg->entries; i++) {
if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
msg->points[i].count++;
@@ -139,6 +156,7 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
* We need to create a new entry
*/
__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+ nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
msg->points[msg->entries].count = 1;
msg->entries++;
@@ -152,24 +170,80 @@ out:
return;
}
+static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+{
+ trace_drop_common(skb, location);
+}
+
+static void trace_napi_poll_hit(struct napi_struct *napi)
+{
+ struct dm_hw_stat_delta *new_stat;
+
+ /*
+ * Ratelimit our check time to dm_hw_check_delta jiffies
+ */
+ if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta))
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ if ((new_stat->dev == napi->dev) &&
+ (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+ trace_drop_common(NULL, NULL);
+ new_stat->last_drop_val = napi->dev->stats.rx_dropped;
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+
+static void free_dm_hw_stat(struct rcu_head *head)
+{
+ struct dm_hw_stat_delta *n;
+ n = container_of(head, struct dm_hw_stat_delta, rcu);
+ kfree(n);
+}
+
static int set_all_monitor_traces(int state)
{
int rc = 0;
+ struct dm_hw_stat_delta *new_stat = NULL;
+ struct dm_hw_stat_delta *temp;
+
+ spin_lock(&trace_state_lock);
switch (state) {
case TRACE_ON:
rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
+ rc |= register_trace_napi_poll(trace_napi_poll_hit);
break;
case TRACE_OFF:
rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
+ rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
tracepoint_synchronize_unregister();
+
+ /*
+ * Clean the device list
+ */
+ list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+ if (new_stat->dev == NULL) {
+ list_del_rcu(&new_stat->list);
+ call_rcu(&new_stat->rcu, free_dm_hw_stat);
+ }
+ }
break;
default:
rc = 1;
break;
}
+ if (!rc)
+ trace_state = state;
+
+ spin_unlock(&trace_state_lock);
+
if (rc)
return -EINPROGRESS;
return rc;
@@ -197,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
return -ENOTSUPP;
}
+static int dropmon_net_event(struct notifier_block *ev_block,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct dm_hw_stat_delta *new_stat = NULL;
+ struct dm_hw_stat_delta *tmp;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+
+ if (!new_stat)
+ goto out;
+
+ new_stat->dev = dev;
+ INIT_RCU_HEAD(&new_stat->rcu);
+ spin_lock(&trace_state_lock);
+ list_add_rcu(&new_stat->list, &hw_stats_list);
+ spin_unlock(&trace_state_lock);
+ break;
+ case NETDEV_UNREGISTER:
+ spin_lock(&trace_state_lock);
+ list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+ if (new_stat->dev == dev) {
+ new_stat->dev = NULL;
+ if (trace_state == TRACE_OFF) {
+ list_del_rcu(&new_stat->list);
+ call_rcu(&new_stat->rcu, free_dm_hw_stat);
+ break;
+ }
+ }
+ }
+ spin_unlock(&trace_state_lock);
+ break;
+ }
+out:
+ return NOTIFY_DONE;
+}
static struct genl_ops dropmon_ops[] = {
{
@@ -213,6 +325,10 @@ static struct genl_ops dropmon_ops[] = {
},
};
+static struct notifier_block dropmon_net_notifier = {
+ .notifier_call = dropmon_net_event
+};
+
static int __init init_net_drop_monitor(void)
{
int cpu;
@@ -236,12 +352,18 @@ static int __init init_net_drop_monitor(void)
ret = genl_register_ops(&net_drop_monitor_family,
&dropmon_ops[i]);
if (ret) {
- printk(KERN_CRIT "failed to register operation %d\n",
+ printk(KERN_CRIT "Failed to register operation %d\n",
dropmon_ops[i].cmd);
goto out_unreg;
}
}
+ rc = register_netdevice_notifier(&dropmon_net_notifier);
+ if (rc < 0) {
+ printk(KERN_CRIT "Failed to register netdevice notifier\n");
+ goto out_unreg;
+ }
+
rc = 0;
for_each_present_cpu(cpu) {
@@ -252,6 +374,7 @@ static int __init init_net_drop_monitor(void)
data->send_timer.data = cpu;
data->send_timer.function = sched_send_work;
}
+
goto out;
out_unreg:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98691e1466b..bd309384f8b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,7 +299,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
- err = ops->configure(rule, skb, nlh, frh, tb);
+ err = ops->configure(rule, skb, frh, tb);
if (err < 0)
goto errout_free;
@@ -500,7 +500,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (rule->target)
NLA_PUT_U32(skb, FRA_GOTO, rule->target);
- if (ops->fill(rule, skb, nlh, frh) < 0)
+ if (ops->fill(rule, skb, frh) < 0)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9cc9f95b109..78e5bfc454a 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,9 +66,9 @@
NOTES.
- * The stored value for avbps is scaled by 2^5, so that maximal
- rate is ~1Gbit, avpps is scaled by 2^10.
-
+ * avbps is scaled by 2^5, avpps is scaled by 2^10.
+ * both values are reported as 32 bit unsigned values. bps can
+ overflow for fast links : max speed being 34360Mbit/sec
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
@@ -86,9 +86,9 @@ struct gen_estimator
spinlock_t *stats_lock;
int ewma_log;
u64 last_bytes;
+ u64 avbps;
u32 last_packets;
u32 avpps;
- u32 avbps;
struct rcu_head e_rcu;
struct rb_node node;
};
@@ -115,6 +115,7 @@ static void est_timer(unsigned long arg)
rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes;
+ u64 brate;
u32 npackets;
u32 rate;
@@ -125,14 +126,14 @@ static void est_timer(unsigned long arg)
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
- rate = (nbytes - e->last_bytes)<<(7 - idx);
+ brate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
- e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+ e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
- e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+ e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
read_unlock(&est_lock);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 4c9c0121c9d..16ad45d4882 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -98,6 +98,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
}
/*
+ * Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+ int offset, int len)
+{
+ int copy;
+ for (; len > 0; ++iov) {
+ /* Skip over the finished iovecs */
+ if (unlikely(offset >= iov->iov_len)) {
+ offset -= iov->iov_len;
+ continue;
+ }
+ copy = min_t(unsigned int, iov->iov_len - offset, len);
+ if (copy_to_user(iov->iov_base + offset, kdata, copy))
+ return -EFAULT;
+ offset = 0;
+ kdata += copy;
+ len -= copy;
+ }
+
+ return 0;
+}
+
+/*
* Copy iovec to kernel. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
@@ -122,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
}
/*
- * For use with ip_build_xmit
+ * Copy iovec from kernel. Returns -EFAULT on error.
*/
-int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
- int len)
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+ int offset, int len)
{
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
@@ -236,3 +262,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
EXPORT_SYMBOL(memcpy_fromiovec);
EXPORT_SYMBOL(memcpy_fromiovecend);
EXPORT_SYMBOL(memcpy_toiovec);
+EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a1cbce7fdae..163b4f5b036 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -771,6 +771,28 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
p->ucast_probes + p->app_probes + p->mcast_probes);
}
+static void neigh_invalidate(struct neighbour *neigh)
+{
+ struct sk_buff *skb;
+
+ NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
+ NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+ neigh->updated = jiffies;
+
+ /* It is very thin place. report_unreachable is very complicated
+ routine. Particularly, it can hit the same neighbour entry!
+
+ So that, we try to be accurate and avoid dead loop. --ANK
+ */
+ while (neigh->nud_state == NUD_FAILED &&
+ (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+ write_unlock(&neigh->lock);
+ neigh->ops->error_report(neigh, skb);
+ write_lock(&neigh->lock);
+ }
+ skb_queue_purge(&neigh->arp_queue);
+}
+
/* Called when a timer expires for a neighbour entry. */
static void neigh_timer_handler(unsigned long arg)
@@ -835,26 +857,9 @@ static void neigh_timer_handler(unsigned long arg)
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- struct sk_buff *skb;
-
neigh->nud_state = NUD_FAILED;
- neigh->updated = jiffies;
notify = 1;
- NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
- NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
-
- /* It is very thin place. report_unreachable is very complicated
- routine. Particularly, it can hit the same neighbour entry!
-
- So that, we try to be accurate and avoid dead loop. --ANK
- */
- while (neigh->nud_state == NUD_FAILED &&
- (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- write_unlock(&neigh->lock);
- neigh->ops->error_report(neigh, skb);
- write_lock(&neigh->lock);
- }
- skb_queue_purge(&neigh->arp_queue);
+ neigh_invalidate(neigh);
}
if (neigh->nud_state & NUD_IN_TIMER) {
@@ -1001,6 +1006,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh->nud_state = new;
err = 0;
notify = old & NUD_VALID;
+ if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
+ (new & NUD_FAILED)) {
+ neigh_invalidate(neigh);
+ notify = 1;
+ }
goto out;
}
@@ -1088,8 +1098,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
struct neighbour *n1 = neigh;
write_unlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
- if (skb->dst && skb->dst->neighbour)
- n1 = skb->dst->neighbour;
+ if (skb_dst(skb) && skb_dst(skb)->neighbour)
+ n1 = skb_dst(skb)->neighbour;
n1->output(skb);
write_lock_bh(&neigh->lock);
}
@@ -1182,7 +1192,7 @@ EXPORT_SYMBOL(neigh_compat_output);
int neigh_resolve_output(struct sk_buff *skb)
{
- struct dst_entry *dst = skb->dst;
+ struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh;
int rc = 0;
@@ -1229,7 +1239,7 @@ EXPORT_SYMBOL(neigh_resolve_output);
int neigh_connected_output(struct sk_buff *skb)
{
int err;
- struct dst_entry *dst = skb->dst;
+ struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
@@ -1298,8 +1308,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
if (time_before(tbl->proxy_timer.expires, sched_next))
sched_next = tbl->proxy_timer.expires;
}
- dst_release(skb->dst);
- skb->dst = NULL;
+ skb_dst_drop(skb);
dev_hold(skb->dev);
__skb_queue_tail(&tbl->proxy_queue, skb);
mod_timer(&tbl->proxy_timer, sched_next);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2da59a0ac4a..3994680c08b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -78,7 +78,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
goto err;
if (!rtnl_trylock())
- return -ERESTARTSYS;
+ return restart_syscall();
if (dev_isalive(net)) {
if ((ret = (*set)(net, new)) == 0)
@@ -225,7 +225,8 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- rtnl_lock();
+ if (!rtnl_trylock())
+ return restart_syscall();
ret = dev_set_alias(netdev, buf, count);
rtnl_unlock();
@@ -238,7 +239,8 @@ static ssize_t show_ifalias(struct device *dev,
const struct net_device *netdev = to_net_dev(dev);
ssize_t ret = 0;
- rtnl_lock();
+ if (!rtnl_trylock())
+ return restart_syscall();
if (netdev->ifalias)
ret = sprintf(buf, "%s\n", netdev->ifalias);
rtnl_unlock();
@@ -497,7 +499,6 @@ int netdev_register_kobject(struct net_device *net)
dev->platform_data = net;
dev->groups = groups;
- BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
dev_set_name(dev, "%s", net->name);
#ifdef CONFIG_SYSFS
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 499a67eaf3a..f1e982c508b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -25,5 +25,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/skb.h>
+#include <trace/events/napi.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e3bebd36f05..b7292a2719d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -115,41 +115,34 @@ static void net_free(struct net *net)
kmem_cache_free(net_cachep, net);
}
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+static struct net *net_create(void)
{
- struct net *new_net = NULL;
- int err;
-
- get_net(old_net);
-
- if (!(flags & CLONE_NEWNET))
- return old_net;
-
- err = -ENOMEM;
- new_net = net_alloc();
- if (!new_net)
- goto out_err;
+ struct net *net;
+ int rv;
+ net = net_alloc();
+ if (!net)
+ return ERR_PTR(-ENOMEM);
mutex_lock(&net_mutex);
- err = setup_net(new_net);
- if (!err) {
+ rv = setup_net(net);
+ if (rv == 0) {
rtnl_lock();
- list_add_tail(&new_net->list, &net_namespace_list);
+ list_add_tail(&net->list, &net_namespace_list);
rtnl_unlock();
}
mutex_unlock(&net_mutex);
+ if (rv < 0) {
+ net_free(net);
+ return ERR_PTR(rv);
+ }
+ return net;
+}
- if (err)
- goto out_free;
-out:
- put_net(old_net);
- return new_net;
-
-out_free:
- net_free(new_net);
-out_err:
- new_net = ERR_PTR(err);
- goto out;
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+ if (!(flags & CLONE_NEWNET))
+ return get_net(old_net);
+ return net_create();
}
static void cleanup_net(struct work_struct *work)
@@ -203,9 +196,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
static int __init net_ns_init(void)
{
struct net_generic *ng;
- int err;
- printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
@@ -224,15 +215,14 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
mutex_lock(&net_mutex);
- err = setup_net(&init_net);
+ if (setup_net(&init_net))
+ panic("Could not setup the initial network namespace");
rtnl_lock();
list_add_tail(&init_net.list, &net_namespace_list);
rtnl_unlock();
mutex_unlock(&net_mutex);
- if (err)
- panic("Could not setup the initial network namespace");
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b5873bdff61..9675f312830 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -24,6 +24,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
+#include <trace/events/napi.h>
/*
* We maintain a small pool of fully-sized skbs, to make sure the
@@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo,
set_bit(NAPI_STATE_NPSVC, &napi->state);
work = napi->poll(napi, budget);
+ trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
atomic_dec(&trapped);
@@ -175,9 +177,13 @@ static void service_arp_queue(struct netpoll_info *npi)
void netpoll_poll(struct netpoll *np)
{
struct net_device *dev = np->dev;
- const struct net_device_ops *ops = dev->netdev_ops;
+ const struct net_device_ops *ops;
+
+ if (!dev || !netif_running(dev))
+ return;
- if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
+ ops = dev->netdev_ops;
+ if (!ops->ndo_poll_controller)
return;
/* Process pending work on NIC */
@@ -296,8 +302,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
- if (!netif_tx_queue_stopped(txq))
+ if (!netif_tx_queue_stopped(txq)) {
status = ops->ndo_start_xmit(skb, dev);
+ if (status == NETDEV_TX_OK)
+ txq_trans_update(txq);
+ }
__netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3779c1438c1..19b8c20e98a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev)
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
int i = 0;
- for (; i < pkt_dev->nflows; i++){
+ for (; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
if (x) {
xfrm_state_put(x);
@@ -3438,6 +3438,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
retry_now:
ret = (*xmit)(pkt_dev->skb, odev);
if (likely(ret == NETDEV_TX_OK)) {
+ txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;
@@ -3690,8 +3691,7 @@ out1:
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
#endif
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
+ vfree(pkt_dev->flows);
kfree(pkt_dev);
return err;
}
@@ -3790,8 +3790,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
#endif
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
+ vfree(pkt_dev->flows);
kfree(pkt_dev);
return 0;
}
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 86234923a3b..79687dfd695 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
if (dma_mapping_error(dev, map))
goto out_err;
- sp->dma_maps[0] = map;
+ sp->dma_head = map;
for (i = 0; i < sp->nr_frags; i++) {
skb_frag_t *fp = &sp->frags[i];
@@ -28,9 +28,8 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
fp->size, dir);
if (dma_mapping_error(dev, map))
goto unwind;
- sp->dma_maps[i + 1] = map;
+ sp->dma_maps[i] = map;
}
- sp->num_dma_maps = i + 1;
return 0;
@@ -38,10 +37,10 @@ unwind:
while (--i >= 0) {
skb_frag_t *fp = &sp->frags[i];
- dma_unmap_page(dev, sp->dma_maps[i + 1],
+ dma_unmap_page(dev, sp->dma_maps[i],
fp->size, dir);
}
- dma_unmap_single(dev, sp->dma_maps[0],
+ dma_unmap_single(dev, sp->dma_head,
skb_headlen(skb), dir);
out_err:
return -ENOMEM;
@@ -54,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
struct skb_shared_info *sp = skb_shinfo(skb);
int i;
- dma_unmap_single(dev, sp->dma_maps[0],
+ dma_unmap_single(dev, sp->dma_head,
skb_headlen(skb), dir);
for (i = 0; i < sp->nr_frags; i++) {
skb_frag_t *fp = &sp->frags[i];
- dma_unmap_page(dev, sp->dma_maps[i + 1],
+ dma_unmap_page(dev, sp->dma_maps[i],
fp->size, dir);
}
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c4906bbcd60..5c93435b034 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
@@ -201,6 +202,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
+ kmemcheck_annotate_bitfield(skb, flags1);
+ kmemcheck_annotate_bitfield(skb, flags2);
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
atomic_set(&shinfo->dataref, 1);
@@ -210,13 +213,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->tx_flags.flags = 0;
- shinfo->frag_list = NULL;
+ skb_frag_list_init(skb);
memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
if (fclone) {
struct sk_buff *child = skb + 1;
atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ kmemcheck_annotate_bitfield(child, flags1);
+ kmemcheck_annotate_bitfield(child, flags2);
skb->fclone = SKB_FCLONE_ORIG;
atomic_set(fclone_ref, 1);
@@ -323,7 +328,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
- for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+ skb_walk_frags(skb, list)
skb_get(list);
}
@@ -338,7 +343,7 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)->frags[i].page);
}
- if (skb_shinfo(skb)->frag_list)
+ if (skb_has_frags(skb))
skb_drop_fraglist(skb);
kfree(skb->head);
@@ -381,7 +386,7 @@ static void kfree_skbmem(struct sk_buff *skb)
static void skb_release_head_state(struct sk_buff *skb)
{
- dst_release(skb->dst);
+ skb_dst_drop(skb);
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
#endif
@@ -503,7 +508,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->tx_flags.flags = 0;
- shinfo->frag_list = NULL;
+ skb_frag_list_init(skb);
memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
memset(skb, 0, offsetof(struct sk_buff, tail));
@@ -521,13 +526,12 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->transport_header = old->transport_header;
new->network_header = old->network_header;
new->mac_header = old->mac_header;
- new->dst = dst_clone(old->dst);
+ skb_dst_set(new, dst_clone(skb_dst(old)));
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
- new->csum_start = old->csum_start;
- new->csum_offset = old->csum_offset;
+ new->csum = old->csum;
new->local_df = old->local_df;
new->pkt_type = old->pkt_type;
new->ip_summed = old->ip_summed;
@@ -538,6 +542,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
new->protocol = old->protocol;
new->mark = old->mark;
+ new->iif = old->iif;
__nf_copy(new, old);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
@@ -550,10 +555,17 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
#endif
new->vlan_tci = old->vlan_tci;
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+ new->do_not_encrypt = old->do_not_encrypt;
+#endif
skb_copy_secmark(new, old);
}
+/*
+ * You should not add any new code to this function. Add it to
+ * __copy_skb_header above instead.
+ */
static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
{
#define C(x) n->x = skb->x
@@ -569,16 +581,11 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->cloned = 1;
n->nohdr = 0;
n->destructor = NULL;
- C(iif);
C(tail);
C(end);
C(head);
C(data);
C(truesize);
-#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
- C(do_not_encrypt);
- C(requeue);
-#endif
atomic_set(&n->users, 1);
atomic_inc(&(skb_shinfo(skb)->dataref));
@@ -633,6 +640,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
+
+ kmemcheck_annotate_bitfield(n, flags1);
+ kmemcheck_annotate_bitfield(n, flags2);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -755,7 +765,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
skb_shinfo(n)->nr_frags = i;
}
- if (skb_shinfo(skb)->frag_list) {
+ if (skb_has_frags(skb)) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
@@ -818,7 +828,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
- if (skb_shinfo(skb)->frag_list)
+ if (skb_has_frags(skb))
skb_clone_fraglist(skb);
skb_release_data(skb);
@@ -1090,7 +1100,7 @@ drop_pages:
for (; i < nfrags; i++)
put_page(skb_shinfo(skb)->frags[i].page);
- if (skb_shinfo(skb)->frag_list)
+ if (skb_has_frags(skb))
skb_drop_fraglist(skb);
goto done;
}
@@ -1185,7 +1195,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
*/
- if (!skb_shinfo(skb)->frag_list)
+ if (!skb_has_frags(skb))
goto pull_pages;
/* Estimate size of pulled pages. */
@@ -1282,8 +1292,9 @@ EXPORT_SYMBOL(__pskb_pull_tail);
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
{
- int i, copy;
int start = skb_headlen(skb);
+ struct sk_buff *frag_iter;
+ int i, copy;
if (offset > (int)skb->len - len)
goto fault;
@@ -1325,28 +1336,23 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_copy_bits(list, offset - start,
- to, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- to += copy;
- }
- start = end;
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_bits(frag_iter, offset - start, to, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
}
+ start = end;
}
if (!len)
return 0;
@@ -1531,6 +1537,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
.ops = &sock_pipe_buf_ops,
.spd_release = sock_spd_release,
};
+ struct sk_buff *frag_iter;
struct sock *sk = skb->sk;
/*
@@ -1545,13 +1552,11 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
/*
* now see if we have a frag_list to map
*/
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list && tlen; list = list->next) {
- if (__skb_splice_bits(list, &offset, &tlen, &spd, sk))
- break;
- }
+ skb_walk_frags(skb, frag_iter) {
+ if (!tlen)
+ break;
+ if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
+ break;
}
done:
@@ -1590,8 +1595,9 @@ done:
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
{
- int i, copy;
int start = skb_headlen(skb);
+ struct sk_buff *frag_iter;
+ int i, copy;
if (offset > (int)skb->len - len)
goto fault;
@@ -1632,28 +1638,24 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- if (skb_store_bits(list, offset - start,
- from, copy))
- goto fault;
- if ((len -= copy) == 0)
- return 0;
- offset += copy;
- from += copy;
- }
- start = end;
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_store_bits(frag_iter, offset - start,
+ from, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
}
+ start = end;
}
if (!len)
return 0;
@@ -1670,6 +1672,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
int pos = 0;
/* Checksum header. */
@@ -1709,29 +1712,25 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- __wsum csum2;
- if (copy > len)
- copy = len;
- csum2 = skb_checksum(list, offset - start,
- copy, 0);
- csum = csum_block_add(csum, csum2, pos);
- if ((len -= copy) == 0)
- return csum;
- offset += copy;
- pos += copy;
- }
- start = end;
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ __wsum csum2;
+ if (copy > len)
+ copy = len;
+ csum2 = skb_checksum(frag_iter, offset - start,
+ copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ pos += copy;
}
+ start = end;
}
BUG_ON(len);
@@ -1746,6 +1745,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
int pos = 0;
/* Copy header. */
@@ -1790,31 +1790,27 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ skb_walk_frags(skb, frag_iter) {
+ __wsum csum2;
+ int end;
- for (; list; list = list->next) {
- __wsum csum2;
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- csum2 = skb_copy_and_csum_bits(list,
- offset - start,
- to, copy, 0);
- csum = csum_block_add(csum, csum2, pos);
- if ((len -= copy) == 0)
- return csum;
- offset += copy;
- to += copy;
- pos += copy;
- }
- start = end;
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ csum2 = skb_copy_and_csum_bits(frag_iter,
+ offset - start,
+ to, copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ to += copy;
+ pos += copy;
}
+ start = end;
}
BUG_ON(len);
return csum;
@@ -2288,7 +2284,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
next_skb:
block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
- if (abs_offset < block_limit) {
+ if (abs_offset < block_limit && !st->frag_data) {
*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
return block_limit - abs_offset;
}
@@ -2324,8 +2320,7 @@ next_skb:
st->frag_data = NULL;
}
- if (st->root_skb == st->cur_skb &&
- skb_shinfo(st->root_skb)->frag_list) {
+ if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
st->frag_idx = 0;
goto next_skb;
@@ -2636,7 +2631,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
} else
skb_get(fskb2);
- BUG_ON(skb_shinfo(nskb)->frag_list);
+ SKB_FRAG_ASSERT(nskb);
skb_shinfo(nskb)->frag_list = fskb2;
}
@@ -2661,30 +2656,40 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
struct sk_buff *p = *head;
struct sk_buff *nskb;
+ struct skb_shared_info *skbinfo = skb_shinfo(skb);
+ struct skb_shared_info *pinfo = skb_shinfo(p);
unsigned int headroom;
unsigned int len = skb_gro_len(skb);
+ unsigned int offset = skb_gro_offset(skb);
+ unsigned int headlen = skb_headlen(skb);
if (p->len + len >= 65536)
return -E2BIG;
- if (skb_shinfo(p)->frag_list)
+ if (pinfo->frag_list)
goto merge;
- else if (skb_headlen(skb) <= skb_gro_offset(skb)) {
- if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags >
- MAX_SKB_FRAGS)
+ else if (headlen <= offset) {
+ skb_frag_t *frag;
+ skb_frag_t *frag2;
+ int i = skbinfo->nr_frags;
+ int nr_frags = pinfo->nr_frags + i;
+
+ offset -= headlen;
+
+ if (nr_frags > MAX_SKB_FRAGS)
return -E2BIG;
- skb_shinfo(skb)->frags[0].page_offset +=
- skb_gro_offset(skb) - skb_headlen(skb);
- skb_shinfo(skb)->frags[0].size -=
- skb_gro_offset(skb) - skb_headlen(skb);
+ pinfo->nr_frags = nr_frags;
+ skbinfo->nr_frags = 0;
- memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
- skb_shinfo(skb)->frags,
- skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
+ frag = pinfo->frags + nr_frags;
+ frag2 = skbinfo->frags + i;
+ do {
+ *--frag = *--frag2;
+ } while (--i);
- skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
- skb_shinfo(skb)->nr_frags = 0;
+ frag->page_offset += offset;
+ frag->size -= offset;
skb->truesize -= skb->data_len;
skb->len -= skb->data_len;
@@ -2715,7 +2720,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
skb_shinfo(nskb)->frag_list = p;
- skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size;
+ skb_shinfo(nskb)->gso_size = pinfo->gso_size;
skb_header_release(p);
nskb->prev = p;
@@ -2730,16 +2735,13 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
p = nskb;
merge:
- if (skb_gro_offset(skb) > skb_headlen(skb)) {
- skb_shinfo(skb)->frags[0].page_offset +=
- skb_gro_offset(skb) - skb_headlen(skb);
- skb_shinfo(skb)->frags[0].size -=
- skb_gro_offset(skb) - skb_headlen(skb);
- skb_gro_reset_offset(skb);
- skb_gro_pull(skb, skb_headlen(skb));
+ if (offset > headlen) {
+ skbinfo->frags[0].page_offset += offset - headlen;
+ skbinfo->frags[0].size -= offset - headlen;
+ offset = headlen;
}
- __skb_pull(skb, skb_gro_offset(skb));
+ __skb_pull(skb, offset);
p->prev->next = skb;
p->prev = skb;
@@ -2786,6 +2788,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
int elt = 0;
if (copy > 0) {
@@ -2819,26 +2822,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
- WARN_ON(start > offset + len);
+ WARN_ON(start > offset + len);
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- elt += __skb_to_sgvec(list, sg+elt, offset - start,
- copy);
- if ((len -= copy) == 0)
- return elt;
- offset += copy;
- }
- start = end;
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+ copy);
+ if ((len -= copy) == 0)
+ return elt;
+ offset += copy;
}
+ start = end;
}
BUG_ON(len);
return elt;
@@ -2886,7 +2885,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return -ENOMEM;
/* Easy case. Most of packets will go this way. */
- if (!skb_shinfo(skb)->frag_list) {
+ if (!skb_has_frags(skb)) {
/* A little of trouble, not enough of space for trailer.
* This should not happen, when stack is tuned to generate
* good frames. OK, on miss we reallocate and reserve even more
@@ -2921,7 +2920,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
if (skb1->next == NULL && tailbits) {
if (skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list ||
+ skb_has_frags(skb1) ||
skb_tailroom(skb1) < tailbits)
ntail = tailbits + 128;
}
@@ -2930,7 +2929,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
skb_cloned(skb1) ||
ntail ||
skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list) {
+ skb_has_frags(skb1)) {
struct sk_buff *skb2;
/* Fuck, we are miserable poor guys... */
@@ -3016,12 +3015,12 @@ EXPORT_SYMBOL_GPL(skb_tstamp_tx);
*/
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
{
- if (unlikely(start > skb->len - 2) ||
- unlikely((int)start + off > skb->len - 2)) {
+ if (unlikely(start > skb_headlen(skb)) ||
+ unlikely((int)start + off > skb_headlen(skb) - 2)) {
if (net_ratelimit())
printk(KERN_WARNING
"bad partial csum: csum=%u/%u len=%u\n",
- start, off, skb->len);
+ start, off, skb_headlen(skb));
return false;
}
skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/net/core/sock.c b/net/core/sock.c
index 7dbf3ffb35c..b0ba569bc97 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
+ "sk_lock-AF_IEEE802154",
"sk_lock-AF_MAX"
};
static const char *af_family_slock_key_strings[AF_MAX+1] = {
@@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
+ "slock-AF_IEEE802154",
"slock-AF_MAX"
};
static const char *af_family_clock_key_strings[AF_MAX+1] = {
@@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
+ "clock-AF_IEEE802154",
"clock-AF_MAX"
};
@@ -212,6 +215,7 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
/* Maximal space eaten by iovec or ancilliary data plus some space */
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
+EXPORT_SYMBOL(sysctl_optmem_max);
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
@@ -444,7 +448,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
int sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, int optlen)
{
- struct sock *sk=sock->sk;
+ struct sock *sk = sock->sk;
int val;
int valbool;
struct linger ling;
@@ -463,15 +467,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (get_user(val, (int __user *)optval))
return -EFAULT;
- valbool = val?1:0;
+ valbool = val ? 1 : 0;
lock_sock(sk);
- switch(optname) {
+ switch (optname) {
case SO_DEBUG:
- if (val && !capable(CAP_NET_ADMIN)) {
+ if (val && !capable(CAP_NET_ADMIN))
ret = -EACCES;
- } else
+ else
sock_valbool_flag(sk, SOCK_DBG, valbool);
break;
case SO_REUSEADDR:
@@ -582,7 +586,7 @@ set_rcvbuf:
ret = -EINVAL; /* 1003.1g */
break;
}
- if (copy_from_user(&ling,optval,sizeof(ling))) {
+ if (copy_from_user(&ling, optval, sizeof(ling))) {
ret = -EFAULT;
break;
}
@@ -690,9 +694,8 @@ set_rcvbuf:
case SO_MARK:
if (!capable(CAP_NET_ADMIN))
ret = -EPERM;
- else {
+ else
sk->sk_mark = val;
- }
break;
/* We implement the SO_SNDLOWAT etc to
@@ -704,6 +707,7 @@ set_rcvbuf:
release_sock(sk);
return ret;
}
+EXPORT_SYMBOL(sock_setsockopt);
int sock_getsockopt(struct socket *sock, int level, int optname,
@@ -727,7 +731,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
memset(&v, 0, sizeof(v));
- switch(optname) {
+ switch (optname) {
case SO_DEBUG:
v.val = sock_flag(sk, SOCK_DBG);
break;
@@ -762,7 +766,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_ERROR:
v.val = -sock_error(sk);
- if (v.val==0)
+ if (v.val == 0)
v.val = xchg(&sk->sk_err_soft, 0);
break;
@@ -816,7 +820,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_RCVTIMEO:
- lv=sizeof(struct timeval);
+ lv = sizeof(struct timeval);
if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
@@ -827,7 +831,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_SNDTIMEO:
- lv=sizeof(struct timeval);
+ lv = sizeof(struct timeval);
if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
@@ -842,7 +846,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_SNDLOWAT:
- v.val=1;
+ v.val = 1;
break;
case SO_PASSCRED:
@@ -941,6 +945,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
+ kmemcheck_annotate_bitfield(sk, flags);
+
if (security_sk_alloc(sk, family, priority))
goto out_free;
@@ -1002,8 +1008,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
return sk;
}
+EXPORT_SYMBOL(sk_alloc);
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
{
struct sk_filter *filter;
@@ -1027,6 +1034,18 @@ void sk_free(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk);
}
+void sk_free(struct sock *sk)
+{
+ /*
+ * We substract one from sk_wmem_alloc and can know if
+ * some packets are still in some tx queue.
+ * If not null, sock_wfree() will call __sk_free(sk) later
+ */
+ if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
+EXPORT_SYMBOL(sk_free);
+
/*
* Last sock_put should drop referrence to sk->sk_net. It has already
* been dropped in sk_change_net. Taking referrence to stopping namespace
@@ -1065,7 +1084,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
atomic_set(&newsk->sk_rmem_alloc, 0);
- atomic_set(&newsk->sk_wmem_alloc, 0);
+ /*
+ * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+ */
+ atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
@@ -1126,7 +1148,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
out:
return newsk;
}
-
EXPORT_SYMBOL_GPL(sk_clone);
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -1170,13 +1191,20 @@ void __init sk_init(void)
void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
+ int res;
/* In case it might be waiting for more memory. */
- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
sk->sk_write_space(sk);
- sock_put(sk);
+ /*
+ * if sk_wmem_alloc reached 0, we are last user and should
+ * free this sock, as sk_free() call could not do it.
+ */
+ if (res == 0)
+ __sk_free(sk);
}
+EXPORT_SYMBOL(sock_wfree);
/*
* Read buffer destructor automatically called from kfree_skb.
@@ -1188,6 +1216,7 @@ void sock_rfree(struct sk_buff *skb)
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_uncharge(skb->sk, skb->truesize);
}
+EXPORT_SYMBOL(sock_rfree);
int sock_i_uid(struct sock *sk)
@@ -1199,6 +1228,7 @@ int sock_i_uid(struct sock *sk)
read_unlock(&sk->sk_callback_lock);
return uid;
}
+EXPORT_SYMBOL(sock_i_uid);
unsigned long sock_i_ino(struct sock *sk)
{
@@ -1209,6 +1239,7 @@ unsigned long sock_i_ino(struct sock *sk)
read_unlock(&sk->sk_callback_lock);
return ino;
}
+EXPORT_SYMBOL(sock_i_ino);
/*
* Allocate a skb from the socket's send buffer.
@@ -1217,7 +1248,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority)
{
if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- struct sk_buff * skb = alloc_skb(size, priority);
+ struct sk_buff *skb = alloc_skb(size, priority);
if (skb) {
skb_set_owner_w(skb, sk);
return skb;
@@ -1225,6 +1256,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
}
return NULL;
}
+EXPORT_SYMBOL(sock_wmalloc);
/*
* Allocate a skb from the socket's receive buffer.
@@ -1261,6 +1293,7 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
}
return NULL;
}
+EXPORT_SYMBOL(sock_kmalloc);
/*
* Free an option memory block.
@@ -1270,11 +1303,12 @@ void sock_kfree_s(struct sock *sk, void *mem, int size)
kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);
}
+EXPORT_SYMBOL(sock_kfree_s);
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
-static long sock_wait_for_wmem(struct sock * sk, long timeo)
+static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
@@ -1392,6 +1426,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
{
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
}
+EXPORT_SYMBOL(sock_alloc_send_skb);
static void __lock_sock(struct sock *sk)
{
@@ -1460,7 +1495,6 @@ int sk_wait_data(struct sock *sk, long *timeo)
finish_wait(sk->sk_sleep, &wait);
return rc;
}
-
EXPORT_SYMBOL(sk_wait_data);
/**
@@ -1541,7 +1575,6 @@ suppress_allocation:
atomic_sub(amt, prot->memory_allocated);
return 0;
}
-
EXPORT_SYMBOL(__sk_mem_schedule);
/**
@@ -1560,7 +1593,6 @@ void __sk_mem_reclaim(struct sock *sk)
(atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
-
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -1575,78 +1607,92 @@ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_bind);
int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
int len, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_connect);
int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_socketpair);
int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
int *len, int peer)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
{
return 0;
}
+EXPORT_SYMBOL(sock_no_poll);
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_ioctl);
int sock_no_listen(struct socket *sock, int backlog)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_listen);
int sock_no_shutdown(struct socket *sock, int how)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_shutdown);
int sock_no_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, int optlen)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_setsockopt);
int sock_no_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_getsockopt);
int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t len)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_sendmsg);
int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t len, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_recvmsg);
int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
{
/* Mirror missing mmap method error code */
return -ENODEV;
}
+EXPORT_SYMBOL(sock_no_mmap);
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
@@ -1660,6 +1706,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
kunmap(page);
return res;
}
+EXPORT_SYMBOL(sock_no_sendpage);
/*
* Default Socket Callbacks
@@ -1723,6 +1770,7 @@ void sk_send_sigurg(struct sock *sk)
if (send_sigurg(&sk->sk_socket->file->f_owner))
sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
+EXPORT_SYMBOL(sk_send_sigurg);
void sk_reset_timer(struct sock *sk, struct timer_list* timer,
unsigned long expires)
@@ -1730,7 +1778,6 @@ void sk_reset_timer(struct sock *sk, struct timer_list* timer,
if (!mod_timer(timer, expires))
sock_hold(sk);
}
-
EXPORT_SYMBOL(sk_reset_timer);
void sk_stop_timer(struct sock *sk, struct timer_list* timer)
@@ -1738,7 +1785,6 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer)
if (timer_pending(timer) && del_timer(timer))
__sock_put(sk);
}
-
EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
@@ -1795,8 +1841,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
atomic_set(&sk->sk_refcnt, 1);
+ atomic_set(&sk->sk_wmem_alloc, 1);
atomic_set(&sk->sk_drops, 0);
}
+EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
@@ -1812,7 +1860,6 @@ void lock_sock_nested(struct sock *sk, int subclass)
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
local_bh_enable();
}
-
EXPORT_SYMBOL(lock_sock_nested);
void release_sock(struct sock *sk)
@@ -1895,7 +1942,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
}
-
EXPORT_SYMBOL(sock_common_getsockopt);
#ifdef CONFIG_COMPAT
@@ -1925,7 +1971,6 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_namelen = addr_len;
return err;
}
-
EXPORT_SYMBOL(sock_common_recvmsg);
/*
@@ -1938,7 +1983,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
}
-
EXPORT_SYMBOL(sock_common_setsockopt);
#ifdef CONFIG_COMPAT
@@ -1989,7 +2033,6 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
sock_put(sk);
}
-
EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
@@ -2171,7 +2214,6 @@ out_free_sock_slab:
out:
return -ENOBUFS;
}
-
EXPORT_SYMBOL(proto_register);
void proto_unregister(struct proto *prot)
@@ -2198,7 +2240,6 @@ void proto_unregister(struct proto *prot)
prot->twsk_prot->twsk_slab = NULL;
}
}
-
EXPORT_SYMBOL(proto_unregister);
#ifdef CONFIG_PROC_FS
@@ -2324,33 +2365,3 @@ static int __init proto_init(void)
subsys_initcall(proto_init);
#endif /* PROC_FS */
-
-EXPORT_SYMBOL(sk_alloc);
-EXPORT_SYMBOL(sk_free);
-EXPORT_SYMBOL(sk_send_sigurg);
-EXPORT_SYMBOL(sock_alloc_send_skb);
-EXPORT_SYMBOL(sock_init_data);
-EXPORT_SYMBOL(sock_kfree_s);
-EXPORT_SYMBOL(sock_kmalloc);
-EXPORT_SYMBOL(sock_no_accept);
-EXPORT_SYMBOL(sock_no_bind);
-EXPORT_SYMBOL(sock_no_connect);
-EXPORT_SYMBOL(sock_no_getname);
-EXPORT_SYMBOL(sock_no_getsockopt);
-EXPORT_SYMBOL(sock_no_ioctl);
-EXPORT_SYMBOL(sock_no_listen);
-EXPORT_SYMBOL(sock_no_mmap);
-EXPORT_SYMBOL(sock_no_poll);
-EXPORT_SYMBOL(sock_no_recvmsg);
-EXPORT_SYMBOL(sock_no_sendmsg);
-EXPORT_SYMBOL(sock_no_sendpage);
-EXPORT_SYMBOL(sock_no_setsockopt);
-EXPORT_SYMBOL(sock_no_shutdown);
-EXPORT_SYMBOL(sock_no_socketpair);
-EXPORT_SYMBOL(sock_rfree);
-EXPORT_SYMBOL(sock_setsockopt);
-EXPORT_SYMBOL(sock_wfree);
-EXPORT_SYMBOL(sock_wmalloc);
-EXPORT_SYMBOL(sock_i_uid);
-EXPORT_SYMBOL(sock_i_ino);
-EXPORT_SYMBOL(sysctl_optmem_max);
diff --git a/net/core/stream.c b/net/core/stream.c
index 8727cead64a..a37debfeb1b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -33,7 +33,8 @@ void sk_stream_write_space(struct sock *sk)
clear_bit(SOCK_NOSPACE, &sock->flags);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 164b090d5ac..25d717ebc92 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -51,6 +51,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
{
int start = skb_headlen(skb);
int i, copy = start - offset;
+ struct sk_buff *frag_iter;
dma_cookie_t cookie = 0;
/* Copy header. */
@@ -94,31 +95,28 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
start = end;
}
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- WARN_ON(start > offset + len);
-
- end = start + list->len;
- copy = end - offset;
- if (copy > 0) {
- if (copy > len)
- copy = len;
- cookie = dma_skb_copy_datagram_iovec(chan, list,
- offset - start, to, copy,
- pinned_list);
- if (cookie < 0)
- goto fault;
- len -= copy;
- if (len == 0)
- goto end;
- offset += copy;
- }
- start = end;
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ copy = end - offset;
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
+ offset - start,
+ to, copy,
+ pinned_list);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
}
+ start = end;
}
end: