summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c135
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/filter.c104
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/netpoll.c80
-rw-r--r--net/core/pktgen.c31
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c180
-rw-r--r--net/core/sock.c24
-rw-r--r--net/core/sysctl_net_core.c61
-rw-r--r--net/core/utils.c37
-rw-r--r--net/core/wireless.c1
13 files changed, 370 insertions, 312 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 5e0c56b7f60..f5f5e58943e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,9 +7,10 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += flow.o dev.o ethtool.o dev_mcast.o dst.o \
+obj-y += dev.o ethtool.o dev_mcast.o dst.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
diff --git a/net/core/dev.c b/net/core/dev.c
index ab935778ce8..52a3bf7ae17 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -115,18 +115,6 @@
#endif /* CONFIG_NET_RADIO */
#include <asm/current.h>
-/* This define, if set, will randomly drop a packet when congestion
- * is more than moderate. It helps fairness in the multi-interface
- * case when one of them is a hog, but it kills performance for the
- * single interface case so it is off now by default.
- */
-#undef RAND_LIE
-
-/* Setting this will sample the queue lengths and thus congestion
- * via a timer instead of as each packet is received.
- */
-#undef OFFLINE_SAMPLE
-
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock);
static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy);
-static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
-#endif
-
/*
* The @dev_base list is protected by @dev_base_lock and the rtln
* semaphore.
@@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain;
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
#ifdef CONFIG_SYSFS
extern int netdev_sysfs_init(void);
@@ -918,8 +901,7 @@ int dev_close(struct net_device *dev)
smp_mb__after_clear_bit(); /* Commit netif_running(). */
while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
/* No hurry. */
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(1);
+ msleep(1);
}
/*
@@ -1144,7 +1126,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
extern void skb_release_data(struct sk_buff *);
/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
unsigned int size;
u8 *data;
@@ -1363,71 +1345,13 @@ out:
Receiver routines
=======================================================================*/
-int netdev_max_backlog = 300;
+int netdev_max_backlog = 1000;
+int netdev_budget = 300;
int weight_p = 64; /* old backlog weight */
-/* These numbers are selected based on intuition and some
- * experimentatiom, if you have more scientific way of doing this
- * please go ahead and fix things.
- */
-int no_cong_thresh = 10;
-int no_cong = 20;
-int lo_cong = 100;
-int mod_cong = 290;
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
-static void get_sample_stats(int cpu)
-{
-#ifdef RAND_LIE
- unsigned long rd;
- int rq;
-#endif
- struct softnet_data *sd = &per_cpu(softnet_data, cpu);
- int blog = sd->input_pkt_queue.qlen;
- int avg_blog = sd->avg_blog;
-
- avg_blog = (avg_blog >> 1) + (blog >> 1);
-
- if (avg_blog > mod_cong) {
- /* Above moderate congestion levels. */
- sd->cng_level = NET_RX_CN_HIGH;
-#ifdef RAND_LIE
- rd = net_random();
- rq = rd % netdev_max_backlog;
- if (rq < avg_blog) /* unlucky bastard */
- sd->cng_level = NET_RX_DROP;
-#endif
- } else if (avg_blog > lo_cong) {
- sd->cng_level = NET_RX_CN_MOD;
-#ifdef RAND_LIE
- rd = net_random();
- rq = rd % netdev_max_backlog;
- if (rq < avg_blog) /* unlucky bastard */
- sd->cng_level = NET_RX_CN_HIGH;
-#endif
- } else if (avg_blog > no_cong)
- sd->cng_level = NET_RX_CN_LOW;
- else /* no congestion */
- sd->cng_level = NET_RX_SUCCESS;
-
- sd->avg_blog = avg_blog;
-}
-
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy)
-{
-/* 10 ms 0r 1ms -- i don't care -- JHS */
- int next_tick = 1;
- int cpu = smp_processor_id();
-
- get_sample_stats(cpu);
- next_tick += jiffies;
- mod_timer(&samp_timer, next_tick);
-}
-#endif
-
-
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
@@ -1448,7 +1372,6 @@ static void sample_queue(unsigned long dummy)
int netif_rx(struct sk_buff *skb)
{
- int this_cpu;
struct softnet_data *queue;
unsigned long flags;
@@ -1464,38 +1387,22 @@ int netif_rx(struct sk_buff *skb)
* short when CPU is congested, but is still operating.
*/
local_irq_save(flags);
- this_cpu = smp_processor_id();
queue = &__get_cpu_var(softnet_data);
__get_cpu_var(netdev_rx_stat).total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
- if (queue->throttle)
- goto drop;
-
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue, skb);
-#ifndef OFFLINE_SAMPLE
- get_sample_stats(this_cpu);
-#endif
local_irq_restore(flags);
- return queue->cng_level;
+ return NET_RX_SUCCESS;
}
- if (queue->throttle)
- queue->throttle = 0;
-
netif_rx_schedule(&queue->backlog_dev);
goto enqueue;
}
- if (!queue->throttle) {
- queue->throttle = 1;
- __get_cpu_var(netdev_rx_stat).throttled++;
- }
-
-drop:
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
@@ -1780,8 +1687,6 @@ job_done:
smp_mb__before_clear_bit();
netif_poll_enable(backlog_dev);
- if (queue->throttle)
- queue->throttle = 0;
local_irq_enable();
return 0;
}
@@ -1790,8 +1695,7 @@ static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- int budget = netdev_max_backlog;
-
+ int budget = netdev_budget;
local_irq_disable();
@@ -2055,15 +1959,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
struct netif_rx_stats *s = v;
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- s->total, s->dropped, s->time_squeeze, s->throttled,
- s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
- s->fastroute_deferred_out,
-#if 0
- s->fastroute_latency_reduction
-#else
- s->cpu_collision
-#endif
- );
+ s->total, s->dropped, s->time_squeeze, 0,
+ 0, 0, 0, 0, /* was fastroute */
+ s->cpu_collision );
return 0;
}
@@ -2190,10 +2088,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
- dev->flags |= IFF_PROMISC;
if ((dev->promiscuity += inc) == 0)
dev->flags &= ~IFF_PROMISC;
- if (dev->flags ^ old_flags) {
+ else
+ dev->flags |= IFF_PROMISC;
+ if (dev->flags != old_flags) {
dev_mc_upload(dev);
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -3305,9 +3204,6 @@ static int __init net_dev_init(void)
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
- queue->throttle = 0;
- queue->cng_level = 0;
- queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
@@ -3316,11 +3212,6 @@ static int __init net_dev_init(void)
atomic_set(&queue->backlog_dev.refcnt, 1);
}
-#ifdef OFFLINE_SAMPLE
- samp_timer.expires = jiffies + (10 * HZ);
- add_timer(&samp_timer);
-#endif
-
dev_boot_phase = 0;
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
diff --git a/net/core/dst.c b/net/core/dst.c
index fc434ade527..334790da9f1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer =
static void dst_run_gc(unsigned long dummy)
{
int delayed = 0;
+ int work_performed;
struct dst_entry * dst, **dstp;
if (!spin_trylock(&dst_lock)) {
@@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dummy)
return;
}
-
del_timer(&dst_gc_timer);
dstp = &dst_garbage_list;
+ work_performed = 0;
while ((dst = *dstp) != NULL) {
if (atomic_read(&dst->__refcnt)) {
dstp = &dst->next;
@@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dummy)
continue;
}
*dstp = dst->next;
+ work_performed = 1;
dst = dst_destroy(dst);
if (dst) {
@@ -86,9 +88,14 @@ static void dst_run_gc(unsigned long dummy)
dst_gc_timer_inc = DST_GC_MAX;
goto out;
}
- if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
- dst_gc_timer_expires = DST_GC_MAX;
- dst_gc_timer_inc += DST_GC_INC;
+ if (!work_performed) {
+ if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+ dst_gc_timer_expires = DST_GC_MAX;
+ dst_gc_timer_inc += DST_GC_INC;
+ } else {
+ dst_gc_timer_inc = DST_GC_INC;
+ dst_gc_timer_expires = DST_GC_MIN;
+ }
dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
#if RT_CACHE_DEBUG >= 2
printk("dst_total: %d/%d %ld\n",
diff --git a/net/core/filter.c b/net/core/filter.c
index f3b88205ace..cd91a24f972 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,7 @@
#include <linux/filter.h>
/* No hurry in this branch */
-static u8 *load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
@@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
return NULL;
}
+static inline void *load_pointer(struct sk_buff *skb, int k,
+ unsigned int size, void *buffer)
+{
+ if (k >= 0)
+ return skb_header_pointer(skb, k, size, buffer);
+ else {
+ if (k >= SKF_AD_OFF)
+ return NULL;
+ return __load_pointer(skb, k);
+ }
+}
+
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
@@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
- unsigned char *data = skb->data;
- /* len is UNSIGNED. Byte wide insns relies only on implicit
- type casts to prevent reading arbitrary memory locations.
- */
- unsigned int len = skb->len-skb->data_len;
struct sock_filter *fentry; /* We walk down these */
+ void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ u32 tmp;
int k;
int pc;
@@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
load_w:
- if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
- A = ntohl(*(u32*)&data[k]);
+ ptr = load_pointer(skb, k, 4, &tmp);
+ if (ptr != NULL) {
+ A = ntohl(*(u32 *)ptr);
continue;
}
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = ntohl(*(u32*)ptr);
- continue;
- }
- } else {
- u32 _tmp, *p;
- p = skb_header_pointer(skb, k, 4, &_tmp);
- if (p != NULL) {
- A = ntohl(*p);
- continue;
- }
- }
return 0;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
- if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
- A = ntohs(*(u16*)&data[k]);
+ ptr = load_pointer(skb, k, 2, &tmp);
+ if (ptr != NULL) {
+ A = ntohs(*(u16 *)ptr);
continue;
}
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = ntohs(*(u16*)ptr);
- continue;
- }
- } else {
- u16 _tmp, *p;
- p = skb_header_pointer(skb, k, 2, &_tmp);
- if (p != NULL) {
- A = ntohs(*p);
- continue;
- }
- }
return 0;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
- if (k >= 0 && (unsigned int)k < len) {
- A = data[k];
+ ptr = load_pointer(skb, k, 1, &tmp);
+ if (ptr != NULL) {
+ A = *(u8 *)ptr;
continue;
}
- if (k < 0) {
- u8 *ptr;
-
- if (k >= SKF_AD_OFF)
- break;
- ptr = load_pointer(skb, k);
- if (ptr) {
- A = *ptr;
- continue;
- }
- } else {
- u8 _tmp, *p;
- p = skb_header_pointer(skb, k, 1, &_tmp);
- if (p != NULL) {
- A = *p;
- continue;
- }
- }
return 0;
case BPF_LD|BPF_W|BPF_LEN:
- A = len;
+ A = skb->len;
continue;
case BPF_LDX|BPF_W|BPF_LEN:
- X = len;
+ X = skb->len;
continue;
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
@@ -259,10 +217,12 @@ load_b:
k = X + fentry->k;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
- if (fentry->k >= len)
- return 0;
- X = (data[fentry->k] & 0xf) << 2;
- continue;
+ ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ if (ptr != NULL) {
+ X = (*(u8 *)ptr & 0xf) << 2;
+ continue;
+ }
+ return 0;
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f6bdcad47da..1beb782ac41 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -32,6 +32,7 @@
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
+#include <linux/string.h>
#define NEIGH_DEBUG 1
@@ -1597,6 +1598,8 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
+ ndtmsg->ndtm_pad1 = 0;
+ ndtmsg->ndtm_pad2 = 0;
RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
@@ -1682,6 +1685,8 @@ static int neightbl_fill_param_info(struct neigh_table *tbl,
read_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
+ ndtmsg->ndtm_pad1 = 0;
+ ndtmsg->ndtm_pad2 = 0;
RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
if (neightbl_fill_parms(skb, parms) < 0)
@@ -1871,6 +1876,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
struct ndmsg *ndm = NLMSG_DATA(nlh);
ndm->ndm_family = n->ops->family;
+ ndm->ndm_pad1 = 0;
+ ndm->ndm_pad2 = 0;
ndm->ndm_flags = n->flags;
ndm->ndm_type = n->type;
ndm->ndm_ifindex = n->dev->ifindex;
@@ -2592,7 +2599,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[17].extra1 = dev;
}
- dev_name = net_sysctl_strdup(dev_name_source);
+ dev_name = kstrdup(dev_name_source, GFP_KERNEL);
if (!dev_name) {
err = -ENOBUFS;
goto free;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a119696d552..c327c9edadc 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
*/
static void poll_napi(struct netpoll *np)
{
+ struct netpoll_info *npinfo = np->dev->npinfo;
int budget = 16;
if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
- np->poll_owner != smp_processor_id() &&
- spin_trylock(&np->poll_lock)) {
- np->rx_flags |= NETPOLL_RX_DROP;
+ npinfo->poll_owner != smp_processor_id() &&
+ spin_trylock(&npinfo->poll_lock)) {
+ npinfo->rx_flags |= NETPOLL_RX_DROP;
atomic_inc(&trapped);
np->dev->poll(np->dev, &budget);
atomic_dec(&trapped);
- np->rx_flags &= ~NETPOLL_RX_DROP;
- spin_unlock(&np->poll_lock);
+ npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+ spin_unlock(&npinfo->poll_lock);
}
}
@@ -245,6 +246,7 @@ repeat:
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status;
+ struct netpoll_info *npinfo;
repeat:
if(!np || !np->dev || !netif_running(np->dev)) {
@@ -253,8 +255,9 @@ repeat:
}
/* avoid recursion */
- if(np->poll_owner == smp_processor_id() ||
- np->dev->xmit_lock_owner == smp_processor_id()) {
+ npinfo = np->dev->npinfo;
+ if (npinfo->poll_owner == smp_processor_id() ||
+ np->dev->xmit_lock_owner == smp_processor_id()) {
if (np->drop)
np->drop(skb);
else
@@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static void arp_reply(struct sk_buff *skb)
{
+ struct netpoll_info *npinfo = skb->dev->npinfo;
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
u32 sip, tip;
+ unsigned long flags;
struct sk_buff *send_skb;
- struct netpoll *np = skb->dev->np;
+ struct netpoll *np = NULL;
+
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
+ np = npinfo->rx_np;
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- if (!np) return;
+ if (!np)
+ return;
/* No arp on this interface */
if (skb->dev->flags & IFF_NOARP)
@@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb)
int proto, len, ulen;
struct iphdr *iph;
struct udphdr *uh;
- struct netpoll *np = skb->dev->np;
+ struct netpoll *np = skb->dev->npinfo->rx_np;
- if (!np->rx_hook)
+ if (!np)
goto out;
if (skb->dev->type != ARPHRD_ETHER)
goto out;
@@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
struct in_device *in_dev;
-
- np->poll_lock = SPIN_LOCK_UNLOCKED;
- np->poll_owner = -1;
+ struct netpoll_info *npinfo;
+ unsigned long flags;
if (np->dev_name)
ndev = dev_get_by_name(np->dev_name);
@@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np)
}
np->dev = ndev;
- ndev->np = np;
+ if (!ndev->npinfo) {
+ npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+ if (!npinfo)
+ goto release;
+
+ npinfo->rx_np = NULL;
+ npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
+ npinfo->poll_owner = -1;
+ npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
+ } else
+ npinfo = ndev->npinfo;
if (!ndev->poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
@@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np)
np->name, HIPQUAD(np->local_ip));
}
- if(np->rx_hook)
- np->rx_flags = NETPOLL_RX_ENABLED;
+ if (np->rx_hook) {
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+ npinfo->rx_np = np;
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ }
+ /* last thing to do is link it to the net device structure */
+ ndev->npinfo = npinfo;
return 0;
release:
- ndev->np = NULL;
+ if (!ndev->npinfo)
+ kfree(npinfo);
np->dev = NULL;
dev_put(ndev);
return -1;
@@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np)
void netpoll_cleanup(struct netpoll *np)
{
- if (np->dev)
- np->dev->np = NULL;
- dev_put(np->dev);
+ struct netpoll_info *npinfo;
+ unsigned long flags;
+
+ if (np->dev) {
+ npinfo = np->dev->npinfo;
+ if (npinfo && npinfo->rx_np == np) {
+ spin_lock_irqsave(&npinfo->rx_lock, flags);
+ npinfo->rx_np = NULL;
+ npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+ spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+ }
+ dev_put(np->dev);
+ }
+
np->dev = NULL;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c57b06bc79f..8eb083b6041 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -151,7 +151,7 @@
#include <asm/timex.h>
-#define VERSION "pktgen v2.61: Packet Generator for packet performance testing.\n"
+#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n"
/* #define PG_DEBUG(a) a */
#define PG_DEBUG(a)
@@ -363,7 +363,7 @@ struct pktgen_thread {
* All Rights Reserved.
*
*/
-inline static s64 divremdi3(s64 x, s64 y, int type)
+static inline s64 divremdi3(s64 x, s64 y, int type)
{
u64 a = (x < 0) ? -x : x;
u64 b = (y < 0) ? -y : y;
@@ -1921,6 +1921,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct iphdr *iph;
struct pktgen_hdr *pgh = NULL;
+ /* Update any of the values, used when we're incrementing various
+ * fields.
+ */
+ mod_cur_headers(pkt_dev);
+
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
@@ -1934,11 +1939,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
- /* Update any of the values, used when we're incrementing various
- * fields.
- */
- mod_cur_headers(pkt_dev);
-
memcpy(eth, pkt_dev->hh, 12);
*(u16*)&eth[12] = __constant_htons(ETH_P_IP);
@@ -2192,7 +2192,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
int datalen;
struct ipv6hdr *iph;
struct pktgen_hdr *pgh = NULL;
-
+
+ /* Update any of the values, used when we're incrementing various
+ * fields.
+ */
+ mod_cur_headers(pkt_dev);
+
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
@@ -2206,17 +2211,9 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
-
- /* Update any of the values, used when we're incrementing various
- * fields.
- */
- mod_cur_headers(pkt_dev);
-
-
memcpy(eth, pkt_dev->hh, 12);
*(u16*)&eth[12] = __constant_htons(ETH_P_IPV6);
-
-
+
datalen = pkt_dev->cur_pkt_size-14-
sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e013d836a7a..4b1bb30e638 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -126,6 +126,7 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data
rta->rta_type = attrtype;
rta->rta_len = size;
memcpy(RTA_DATA(rta), data, attrlen);
+ memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
}
size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
@@ -188,6 +189,7 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
+ r->__ifi_pad = 0;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev_get_flags(dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6d68c03bc05..7eab867ede5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
{
struct sk_buff *skb;
u8 *data;
@@ -182,7 +182,8 @@ nodata:
* %GFP_ATOMIC.
*/
struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
- unsigned int size, int gfp_mask)
+ unsigned int size,
+ unsigned int __nocast gfp_mask)
{
struct sk_buff *skb;
u8 *data;
@@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb)
* %GFP_ATOMIC.
*/
-struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
@@ -357,7 +358,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C(ip_summed);
C(priority);
C(protocol);
- C(security);
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
@@ -377,8 +377,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
- n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
- n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+ n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+ n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
C(input_dev);
C(tc_classid);
#endif
@@ -422,7 +422,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->pkt_type = old->pkt_type;
new->stamp = old->stamp;
new->destructor = NULL;
- new->security = old->security;
#ifdef CONFIG_NETFILTER
new->nfmark = old->nfmark;
new->nfcache = old->nfcache;
@@ -462,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
* header is going to be modified. Use pskb_copy() instead.
*/
-struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
int headerlen = skb->data - skb->head;
/*
@@ -501,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
/*
* Allocate the copy buffer
@@ -559,7 +558,8 @@ out:
* reloaded after call to this function.
*/
-int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+ unsigned int __nocast gfp_mask)
{
int i;
u8 *data;
@@ -649,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
* only by netfilter in the cases when checksum is recalculated? --ANK
*/
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
- int newheadroom, int newtailroom, int gfp_mask)
+ int newheadroom, int newtailroom,
+ unsigned int __nocast gfp_mask)
{
/*
* Allocate the copy buffer
@@ -1500,6 +1501,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
skb_split_no_header(skb, skb1, len, pos);
}
+/**
+ * skb_prepare_seq_read - Prepare a sequential read of skb data
+ * @skb: the buffer to read
+ * @from: lower offset of data to be read
+ * @to: upper offset of data to be read
+ * @st: state variable
+ *
+ * Initializes the specified state variable. Must be called before
+ * invoking skb_seq_read() for the first time.
+ */
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct skb_seq_state *st)
+{
+ st->lower_offset = from;
+ st->upper_offset = to;
+ st->root_skb = st->cur_skb = skb;
+ st->frag_idx = st->stepped_offset = 0;
+ st->frag_data = NULL;
+}
+
+/**
+ * skb_seq_read - Sequentially read skb data
+ * @consumed: number of bytes consumed by the caller so far
+ * @data: destination pointer for data to be returned
+ * @st: state variable
+ *
+ * Reads a block of skb data at &consumed relative to the
+ * lower offset specified to skb_prepare_seq_read(). Assigns
+ * the head of the data block to &data and returns the length
+ * of the block or 0 if the end of the skb data or the upper
+ * offset has been reached.
+ *
+ * The caller is not required to consume all of the data
+ * returned, i.e. &consumed is typically set to the number
+ * of bytes already consumed and the next call to
+ * skb_seq_read() will return the remaining part of the block.
+ *
+ * Note: The size of each block of data returned can be arbitary,
+ * this limitation is the cost for zerocopy seqeuental
+ * reads of potentially non linear data.
+ *
+ * Note: Fragment lists within fragments are not implemented
+ * at the moment, state->root_skb could be replaced with
+ * a stack for this purpose.
+ */
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+ struct skb_seq_state *st)
+{
+ unsigned int block_limit, abs_offset = consumed + st->lower_offset;
+ skb_frag_t *frag;
+
+ if (unlikely(abs_offset >= st->upper_offset))
+ return 0;
+
+next_skb:
+ block_limit = skb_headlen(st->cur_skb);
+
+ if (abs_offset < block_limit) {
+ *data = st->cur_skb->data + abs_offset;
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_idx == 0 && !st->frag_data)
+ st->stepped_offset += skb_headlen(st->cur_skb);
+
+ while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
+ frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
+ block_limit = frag->size + st->stepped_offset;
+
+ if (abs_offset < block_limit) {
+ if (!st->frag_data)
+ st->frag_data = kmap_skb_frag(frag);
+
+ *data = (u8 *) st->frag_data + frag->page_offset +
+ (abs_offset - st->stepped_offset);
+
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_data) {
+ kunmap_skb_frag(st->frag_data);
+ st->frag_data = NULL;
+ }
+
+ st->frag_idx++;
+ st->stepped_offset += frag->size;
+ }
+
+ if (st->cur_skb->next) {
+ st->cur_skb = st->cur_skb->next;
+ st->frag_idx = 0;
+ goto next_skb;
+ } else if (st->root_skb == st->cur_skb &&
+ skb_shinfo(st->root_skb)->frag_list) {
+ st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+ goto next_skb;
+ }
+
+ return 0;
+}
+
+/**
+ * skb_abort_seq_read - Abort a sequential read of skb data
+ * @st: state variable
+ *
+ * Must be called if skb_seq_read() was not called until it
+ * returned 0.
+ */
+void skb_abort_seq_read(struct skb_seq_state *st)
+{
+ if (st->frag_data)
+ kunmap_skb_frag(st->frag_data);
+}
+
+#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
+
+static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
+ struct ts_config *conf,
+ struct ts_state *state)
+{
+ return skb_seq_read(offset, text, TS_SKB_CB(state));
+}
+
+static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
+{
+ skb_abort_seq_read(TS_SKB_CB(state));
+}
+
+/**
+ * skb_find_text - Find a text pattern in skb data
+ * @skb: the buffer to look in
+ * @from: search offset
+ * @to: search limit
+ * @config: textsearch configuration
+ * @state: uninitialized textsearch state variable
+ *
+ * Finds a pattern in the skb data according to the specified
+ * textsearch configuration. Use textsearch_next() to retrieve
+ * subsequent occurrences of the pattern. Returns the offset
+ * to the first occurrence or UINT_MAX if no match was found.
+ */
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct ts_config *config,
+ struct ts_state *state)
+{
+ config->get_next_block = skb_ts_get_next_block;
+ config->finish = skb_ts_finish;
+
+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+
+ return textsearch_find(config, state);
+}
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1538,3 +1692,7 @@ EXPORT_SYMBOL(skb_queue_tail);
EXPORT_SYMBOL(skb_unlink);
EXPORT_SYMBOL(skb_append);
EXPORT_SYMBOL(skb_split);
+EXPORT_SYMBOL(skb_prepare_seq_read);
+EXPORT_SYMBOL(skb_seq_read);
+EXPORT_SYMBOL(skb_abort_seq_read);
+EXPORT_SYMBOL(skb_find_text);
diff --git a/net/core/sock.c b/net/core/sock.c
index a6ec3ada7f9..12f6d9a2a52 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -206,13 +206,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
*/
#ifdef SO_DONTLINGER /* Compatibility item... */
- switch (optname) {
- case SO_DONTLINGER:
- sock_reset_flag(sk, SOCK_LINGER);
- return 0;
+ if (optname == SO_DONTLINGER) {
+ lock_sock(sk);
+ sock_reset_flag(sk, SOCK_LINGER);
+ release_sock(sk);
+ return 0;
}
-#endif
-
+#endif
+
if(optlen<sizeof(int))
return(-EINVAL);
@@ -622,7 +623,8 @@ lenout:
* @prot: struct proto associated with this new sock instance
* @zero_it: if we should zero the newly allocated sock
*/
-struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
+struct sock *sk_alloc(int family, unsigned int __nocast priority,
+ struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
kmem_cache_t *slab = prot->slab;
@@ -750,7 +752,8 @@ unsigned long sock_i_ino(struct sock *sk)
/*
* Allocate a skb from the socket's send buffer.
*/
-struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
+struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
+ unsigned int __nocast priority)
{
if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
struct sk_buff * skb = alloc_skb(size, priority);
@@ -765,7 +768,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int
/*
* Allocate a skb from the socket's receive buffer.
*/
-struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
+struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
+ unsigned int __nocast priority)
{
if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
@@ -780,7 +784,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
/*
* Allocate a memory block from the socket's option memory buffer.
*/
-void *sock_kmalloc(struct sock *sk, int size, int priority)
+void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
{
if ((unsigned)size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8be646cb19..8f817ad9f54 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -13,12 +13,8 @@
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
+extern int netdev_budget;
extern int weight_p;
-extern int no_cong_thresh;
-extern int no_cong;
-extern int lo_cong;
-extern int mod_cong;
-extern int netdev_fastroute;
extern int net_msg_cost;
extern int net_msg_burst;
@@ -35,19 +31,6 @@ extern int sysctl_somaxconn;
extern char sysctl_divert_version[];
#endif /* CONFIG_NET_DIVERT */
-/*
- * This strdup() is used for creating copies of network
- * device names to be handed over to sysctl.
- */
-
-char *net_sysctl_strdup(const char *s)
-{
- char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
- if (rv)
- strcpy(rv, s);
- return rv;
-}
-
ctl_table core_table[] = {
#ifdef CONFIG_NET
{
@@ -99,38 +82,6 @@ ctl_table core_table[] = {
.proc_handler = &proc_dointvec
},
{
- .ctl_name = NET_CORE_NO_CONG_THRESH,
- .procname = "no_cong_thresh",
- .data = &no_cong_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_NO_CONG,
- .procname = "no_cong",
- .data = &no_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_LO_CONG,
- .procname = "lo_cong",
- .data = &lo_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = NET_CORE_MOD_CONG,
- .procname = "mod_cong",
- .data = &mod_cong,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
.data = &net_msg_cost,
@@ -174,9 +125,15 @@ ctl_table core_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ {
+ .ctl_name = NET_CORE_BUDGET,
+ .procname = "netdev_budget",
+ .data = &netdev_budget,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
{ .ctl_name = 0 }
};
-EXPORT_SYMBOL(net_sysctl_strdup);
-
#endif
diff --git a/net/core/utils.c b/net/core/utils.c
index e11a8654f36..88eb8b68e26 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -23,10 +23,10 @@
#include <linux/percpu.h>
#include <linux/init.h>
+#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
-
/*
This is a maximally equidistributed combined Tausworthe generator
based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -153,3 +153,38 @@ int net_ratelimit(void)
EXPORT_SYMBOL(net_random);
EXPORT_SYMBOL(net_ratelimit);
EXPORT_SYMBOL(net_srandom);
+
+/*
+ * Convert an ASCII string to binary IP.
+ * This is outside of net/ipv4/ because various code that uses IP addresses
+ * is otherwise not dependent on the TCP/IP stack.
+ */
+
+__u32 in_aton(const char *str)
+{
+ unsigned long l;
+ unsigned int val;
+ int i;
+
+ l = 0;
+ for (i = 0; i < 4; i++)
+ {
+ l <<= 8;
+ if (*str != '\0')
+ {
+ val = 0;
+ while (*str != '\0' && *str != '.')
+ {
+ val *= 10;
+ val += *str - '0';
+ str++;
+ }
+ l |= val;
+ if (*str != '\0')
+ str++;
+ }
+ }
+ return(htonl(l));
+}
+
+EXPORT_SYMBOL(in_aton);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index b2fe378dfbf..3ff5639c0b7 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -1102,6 +1102,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
r = NLMSG_DATA(nlh);
r->ifi_family = AF_UNSPEC;
+ r->__ifi_pad = 0;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
r->ifi_flags = dev->flags;