diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 3 | ||||
-rw-r--r-- | net/core/dev.c | 135 | ||||
-rw-r--r-- | net/core/dst.c | 15 | ||||
-rw-r--r-- | net/core/filter.c | 104 | ||||
-rw-r--r-- | net/core/neighbour.c | 9 | ||||
-rw-r--r-- | net/core/netpoll.c | 80 | ||||
-rw-r--r-- | net/core/pktgen.c | 31 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 180 | ||||
-rw-r--r-- | net/core/sock.c | 24 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 61 | ||||
-rw-r--r-- | net/core/utils.c | 37 | ||||
-rw-r--r-- | net/core/wireless.c | 1 |
13 files changed, 370 insertions, 312 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 5e0c56b7f60..f5f5e58943e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,9 +7,10 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += flow.o dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o +obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o obj-$(CONFIG_NETFILTER) += netfilter.o obj-$(CONFIG_NET_DIVERT) += dv.o diff --git a/net/core/dev.c b/net/core/dev.c index ab935778ce8..52a3bf7ae17 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -115,18 +115,6 @@ #endif /* CONFIG_NET_RADIO */ #include <asm/current.h> -/* This define, if set, will randomly drop a packet when congestion - * is more than moderate. It helps fairness in the multi-interface - * case when one of them is a hog, but it kills performance for the - * single interface case so it is off now by default. - */ -#undef RAND_LIE - -/* Setting this will sample the queue lengths and thus congestion - * via a timer instead of as each packet is received. - */ -#undef OFFLINE_SAMPLE - /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ -#ifdef OFFLINE_SAMPLE -static void sample_queue(unsigned long dummy); -static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); -#endif - /* * The @dev_base list is protected by @dev_base_lock and the rtln * semaphore. @@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain; * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; +DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; #ifdef CONFIG_SYSFS extern int netdev_sysfs_init(void); @@ -918,8 +901,7 @@ int dev_close(struct net_device *dev) smp_mb__after_clear_bit(); /* Commit netif_running(). */ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { /* No hurry. */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); + msleep(1); } /* @@ -1144,7 +1126,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) extern void skb_release_data(struct sk_buff *); /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, int gfp_mask) +int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { unsigned int size; u8 *data; @@ -1363,71 +1345,13 @@ out: Receiver routines =======================================================================*/ -int netdev_max_backlog = 300; +int netdev_max_backlog = 1000; +int netdev_budget = 300; int weight_p = 64; /* old backlog weight */ -/* These numbers are selected based on intuition and some - * experimentatiom, if you have more scientific way of doing this - * please go ahead and fix things. - */ -int no_cong_thresh = 10; -int no_cong = 20; -int lo_cong = 100; -int mod_cong = 290; DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; -static void get_sample_stats(int cpu) -{ -#ifdef RAND_LIE - unsigned long rd; - int rq; -#endif - struct softnet_data *sd = &per_cpu(softnet_data, cpu); - int blog = sd->input_pkt_queue.qlen; - int avg_blog = sd->avg_blog; - - avg_blog = (avg_blog >> 1) + (blog >> 1); - - if (avg_blog > mod_cong) { - /* Above moderate congestion levels. */ - sd->cng_level = NET_RX_CN_HIGH; -#ifdef RAND_LIE - rd = net_random(); - rq = rd % netdev_max_backlog; - if (rq < avg_blog) /* unlucky bastard */ - sd->cng_level = NET_RX_DROP; -#endif - } else if (avg_blog > lo_cong) { - sd->cng_level = NET_RX_CN_MOD; -#ifdef RAND_LIE - rd = net_random(); - rq = rd % netdev_max_backlog; - if (rq < avg_blog) /* unlucky bastard */ - sd->cng_level = NET_RX_CN_HIGH; -#endif - } else if (avg_blog > no_cong) - sd->cng_level = NET_RX_CN_LOW; - else /* no congestion */ - sd->cng_level = NET_RX_SUCCESS; - - sd->avg_blog = avg_blog; -} - -#ifdef OFFLINE_SAMPLE -static void sample_queue(unsigned long dummy) -{ -/* 10 ms 0r 1ms -- i don't care -- JHS */ - int next_tick = 1; - int cpu = smp_processor_id(); - - get_sample_stats(cpu); - next_tick += jiffies; - mod_timer(&samp_timer, next_tick); -} -#endif - - /** * netif_rx - post buffer to the network code * @skb: buffer to post @@ -1448,7 +1372,6 @@ static void sample_queue(unsigned long dummy) int netif_rx(struct sk_buff *skb) { - int this_cpu; struct softnet_data *queue; unsigned long flags; @@ -1464,38 +1387,22 @@ int netif_rx(struct sk_buff *skb) * short when CPU is congested, but is still operating. */ local_irq_save(flags); - this_cpu = smp_processor_id(); queue = &__get_cpu_var(softnet_data); __get_cpu_var(netdev_rx_stat).total++; if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { if (queue->input_pkt_queue.qlen) { - if (queue->throttle) - goto drop; - enqueue: dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue, skb); -#ifndef OFFLINE_SAMPLE - get_sample_stats(this_cpu); -#endif local_irq_restore(flags); - return queue->cng_level; + return NET_RX_SUCCESS; } - if (queue->throttle) - queue->throttle = 0; - netif_rx_schedule(&queue->backlog_dev); goto enqueue; } - if (!queue->throttle) { - queue->throttle = 1; - __get_cpu_var(netdev_rx_stat).throttled++; - } - -drop: __get_cpu_var(netdev_rx_stat).dropped++; local_irq_restore(flags); @@ -1780,8 +1687,6 @@ job_done: smp_mb__before_clear_bit(); netif_poll_enable(backlog_dev); - if (queue->throttle) - queue->throttle = 0; local_irq_enable(); return 0; } @@ -1790,8 +1695,7 @@ static void net_rx_action(struct softirq_action *h) { struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - int budget = netdev_max_backlog; - + int budget = netdev_budget; local_irq_disable(); @@ -2055,15 +1959,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v) struct netif_rx_stats *s = v; seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, s->throttled, - s->fastroute_hit, s->fastroute_success, s->fastroute_defer, - s->fastroute_deferred_out, -#if 0 - s->fastroute_latency_reduction -#else - s->cpu_collision -#endif - ); + s->total, s->dropped, s->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + s->cpu_collision ); return 0; } @@ -2190,10 +2088,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; - dev->flags |= IFF_PROMISC; if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; - if (dev->flags ^ old_flags) { + else + dev->flags |= IFF_PROMISC; + if (dev->flags != old_flags) { dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -3305,9 +3204,6 @@ static int __init net_dev_init(void) queue = &per_cpu(softnet_data, i); skb_queue_head_init(&queue->input_pkt_queue); - queue->throttle = 0; - queue->cng_level = 0; - queue->avg_blog = 10; /* arbitrary non-zero */ queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); set_bit(__LINK_STATE_START, &queue->backlog_dev.state); @@ -3316,11 +3212,6 @@ static int __init net_dev_init(void) atomic_set(&queue->backlog_dev.refcnt, 1); } -#ifdef OFFLINE_SAMPLE - samp_timer.expires = jiffies + (10 * HZ); - add_timer(&samp_timer); -#endif - dev_boot_phase = 0; open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); diff --git a/net/core/dst.c b/net/core/dst.c index fc434ade527..334790da9f1 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer = static void dst_run_gc(unsigned long dummy) { int delayed = 0; + int work_performed; struct dst_entry * dst, **dstp; if (!spin_trylock(&dst_lock)) { @@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dummy) return; } - del_timer(&dst_gc_timer); dstp = &dst_garbage_list; + work_performed = 0; while ((dst = *dstp) != NULL) { if (atomic_read(&dst->__refcnt)) { dstp = &dst->next; @@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dummy) continue; } *dstp = dst->next; + work_performed = 1; dst = dst_destroy(dst); if (dst) { @@ -86,9 +88,14 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_MAX; goto out; } - if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) - dst_gc_timer_expires = DST_GC_MAX; - dst_gc_timer_inc += DST_GC_INC; + if (!work_performed) { + if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX) + dst_gc_timer_expires = DST_GC_MAX; + dst_gc_timer_inc += DST_GC_INC; + } else { + dst_gc_timer_inc = DST_GC_INC; + dst_gc_timer_expires = DST_GC_MIN; + } dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", diff --git a/net/core/filter.c b/net/core/filter.c index f3b88205ace..cd91a24f972 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,7 +36,7 @@ #include <linux/filter.h> /* No hurry in this branch */ -static u8 *load_pointer(struct sk_buff *skb, int k) +static void *__load_pointer(struct sk_buff *skb, int k) { u8 *ptr = NULL; @@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k) return NULL; } +static inline void *load_pointer(struct sk_buff *skb, int k, + unsigned int size, void *buffer) +{ + if (k >= 0) + return skb_header_pointer(skb, k, size, buffer); + else { + if (k >= SKF_AD_OFF) + return NULL; + return __load_pointer(skb, k); + } +} + /** * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on @@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k) int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) { - unsigned char *data = skb->data; - /* len is UNSIGNED. Byte wide insns relies only on implicit - type casts to prevent reading arbitrary memory locations. - */ - unsigned int len = skb->len-skb->data_len; struct sock_filter *fentry; /* We walk down these */ + void *ptr; u32 A = 0; /* Accumulator */ u32 X = 0; /* Index Register */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ + u32 tmp; int k; int pc; @@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) case BPF_LD|BPF_W|BPF_ABS: k = fentry->k; load_w: - if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) { - A = ntohl(*(u32*)&data[k]); + ptr = load_pointer(skb, k, 4, &tmp); + if (ptr != NULL) { + A = ntohl(*(u32 *)ptr); continue; } - if (k < 0) { - u8 *ptr; - - if (k >= SKF_AD_OFF) - break; - ptr = load_pointer(skb, k); - if (ptr) { - A = ntohl(*(u32*)ptr); - continue; - } - } else { - u32 _tmp, *p; - p = skb_header_pointer(skb, k, 4, &_tmp); - if (p != NULL) { - A = ntohl(*p); - continue; - } - } return 0; case BPF_LD|BPF_H|BPF_ABS: k = fentry->k; load_h: - if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) { - A = ntohs(*(u16*)&data[k]); + ptr = load_pointer(skb, k, 2, &tmp); + if (ptr != NULL) { + A = ntohs(*(u16 *)ptr); continue; } - if (k < 0) { - u8 *ptr; - - if (k >= SKF_AD_OFF) - break; - ptr = load_pointer(skb, k); - if (ptr) { - A = ntohs(*(u16*)ptr); - continue; - } - } else { - u16 _tmp, *p; - p = skb_header_pointer(skb, k, 2, &_tmp); - if (p != NULL) { - A = ntohs(*p); - continue; - } - } return 0; case BPF_LD|BPF_B|BPF_ABS: k = fentry->k; load_b: - if (k >= 0 && (unsigned int)k < len) { - A = data[k]; + ptr = load_pointer(skb, k, 1, &tmp); + if (ptr != NULL) { + A = *(u8 *)ptr; continue; } - if (k < 0) { - u8 *ptr; - - if (k >= SKF_AD_OFF) - break; - ptr = load_pointer(skb, k); - if (ptr) { - A = *ptr; - continue; - } - } else { - u8 _tmp, *p; - p = skb_header_pointer(skb, k, 1, &_tmp); - if (p != NULL) { - A = *p; - continue; - } - } return 0; case BPF_LD|BPF_W|BPF_LEN: - A = len; + A = skb->len; continue; case BPF_LDX|BPF_W|BPF_LEN: - X = len; + X = skb->len; continue; case BPF_LD|BPF_W|BPF_IND: k = X + fentry->k; @@ -259,10 +217,12 @@ load_b: k = X + fentry->k; goto load_b; case BPF_LDX|BPF_B|BPF_MSH: - if (fentry->k >= len) - return 0; - X = (data[fentry->k] & 0xf) << 2; - continue; + ptr = load_pointer(skb, fentry->k, 1, &tmp); + if (ptr != NULL) { + X = (*(u8 *)ptr & 0xf) << 2; + continue; + } + return 0; case BPF_LD|BPF_IMM: A = fentry->k; continue; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f6bdcad47da..1beb782ac41 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -32,6 +32,7 @@ #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/random.h> +#include <linux/string.h> #define NEIGH_DEBUG 1 @@ -1597,6 +1598,8 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; + ndtmsg->ndtm_pad1 = 0; + ndtmsg->ndtm_pad2 = 0; RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); @@ -1682,6 +1685,8 @@ static int neightbl_fill_param_info(struct neigh_table *tbl, read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; + ndtmsg->ndtm_pad1 = 0; + ndtmsg->ndtm_pad2 = 0; RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); if (neightbl_fill_parms(skb, parms) < 0) @@ -1871,6 +1876,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, struct ndmsg *ndm = NLMSG_DATA(nlh); ndm->ndm_family = n->ops->family; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; ndm->ndm_flags = n->flags; ndm->ndm_type = n->type; ndm->ndm_ifindex = n->dev->ifindex; @@ -2592,7 +2599,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[17].extra1 = dev; } - dev_name = net_sysctl_strdup(dev_name_source); + dev_name = kstrdup(dev_name_source, GFP_KERNEL); if (!dev_name) { err = -ENOBUFS; goto free; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a119696d552..c327c9edadc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, */ static void poll_napi(struct netpoll *np) { + struct netpoll_info *npinfo = np->dev->npinfo; int budget = 16; if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - np->poll_owner != smp_processor_id() && - spin_trylock(&np->poll_lock)) { - np->rx_flags |= NETPOLL_RX_DROP; + npinfo->poll_owner != smp_processor_id() && + spin_trylock(&npinfo->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); np->dev->poll(np->dev, &budget); atomic_dec(&trapped); - np->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&np->poll_lock); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&npinfo->poll_lock); } } @@ -245,6 +246,7 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status; + struct netpoll_info *npinfo; repeat: if(!np || !np->dev || !netif_running(np->dev)) { @@ -253,8 +255,9 @@ repeat: } /* avoid recursion */ - if(np->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { + npinfo = np->dev->npinfo; + if (npinfo->poll_owner == smp_processor_id() || + np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) np->drop(skb); else @@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) static void arp_reply(struct sk_buff *skb) { + struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; + unsigned long flags; struct sk_buff *send_skb; - struct netpoll *np = skb->dev->np; + struct netpoll *np = NULL; + + spin_lock_irqsave(&npinfo->rx_lock, flags); + if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) + np = npinfo->rx_np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; + if (!np) + return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP) @@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->np; + struct netpoll *np = skb->dev->npinfo->rx_np; - if (!np->rx_hook) + if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) goto out; @@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np) { struct net_device *ndev = NULL; struct in_device *in_dev; - - np->poll_lock = SPIN_LOCK_UNLOCKED; - np->poll_owner = -1; + struct netpoll_info *npinfo; + unsigned long flags; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); @@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np) } np->dev = ndev; - ndev->np = np; + if (!ndev->npinfo) { + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + if (!npinfo) + goto release; + + npinfo->rx_np = NULL; + npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + npinfo->poll_owner = -1; + npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + } else + npinfo = ndev->npinfo; if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", @@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np) np->name, HIPQUAD(np->local_ip)); } - if(np->rx_hook) - np->rx_flags = NETPOLL_RX_ENABLED; + if (np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + npinfo->rx_np = np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + /* last thing to do is link it to the net device structure */ + ndev->npinfo = npinfo; return 0; release: - ndev->np = NULL; + if (!ndev->npinfo) + kfree(npinfo); np->dev = NULL; dev_put(ndev); return -1; @@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np) void netpoll_cleanup(struct netpoll *np) { - if (np->dev) - np->dev->np = NULL; - dev_put(np->dev); + struct netpoll_info *npinfo; + unsigned long flags; + + if (np->dev) { + npinfo = np->dev->npinfo; + if (npinfo && npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + dev_put(np->dev); + } + np->dev = NULL; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c57b06bc79f..8eb083b6041 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -151,7 +151,7 @@ #include <asm/timex.h> -#define VERSION "pktgen v2.61: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n" /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) @@ -363,7 +363,7 @@ struct pktgen_thread { * All Rights Reserved. * */ -inline static s64 divremdi3(s64 x, s64 y, int type) +static inline s64 divremdi3(s64 x, s64 y, int type) { u64 a = (x < 0) ? -x : x; u64 b = (y < 0) ? -y : y; @@ -1921,6 +1921,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct iphdr *iph; struct pktgen_hdr *pgh = NULL; + /* Update any of the values, used when we're incrementing various + * fields. + */ + mod_cur_headers(pkt_dev); + skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); @@ -1934,11 +1939,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); - /* Update any of the values, used when we're incrementing various - * fields. - */ - mod_cur_headers(pkt_dev); - memcpy(eth, pkt_dev->hh, 12); *(u16*)ð[12] = __constant_htons(ETH_P_IP); @@ -2192,7 +2192,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, int datalen; struct ipv6hdr *iph; struct pktgen_hdr *pgh = NULL; - + + /* Update any of the values, used when we're incrementing various + * fields. + */ + mod_cur_headers(pkt_dev); + skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); @@ -2206,17 +2211,9 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); - - /* Update any of the values, used when we're incrementing various - * fields. - */ - mod_cur_headers(pkt_dev); - - memcpy(eth, pkt_dev->hh, 12); *(u16*)ð[12] = __constant_htons(ETH_P_IPV6); - - + datalen = pkt_dev->cur_pkt_size-14- sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e013d836a7a..4b1bb30e638 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -126,6 +126,7 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data rta->rta_type = attrtype; rta->rta_len = size; memcpy(RTA_DATA(rta), data, attrlen); + memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); } size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size) @@ -188,6 +189,7 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); r = NLMSG_DATA(nlh); r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; r->ifi_type = dev->type; r->ifi_index = dev->ifindex; r->ifi_flags = dev_get_flags(dev); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6d68c03bc05..7eab867ede5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) +struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -182,7 +182,8 @@ nodata: * %GFP_ATOMIC. */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) + unsigned int size, + unsigned int __nocast gfp_mask) { struct sk_buff *skb; u8 *data; @@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); @@ -357,7 +358,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) C(ip_summed); C(priority); C(protocol); - C(security); n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); @@ -377,8 +377,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) C(tc_index); #ifdef CONFIG_NET_CLS_ACT n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); - n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); + n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); + n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); C(tc_classid); #endif @@ -422,7 +422,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->pkt_type = old->pkt_type; new->stamp = old->stamp; new->destructor = NULL; - new->security = old->security; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; new->nfcache = old->nfcache; @@ -462,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -501,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer @@ -559,7 +558,8 @@ out: * reloaded after call to this function. */ -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, + unsigned int __nocast gfp_mask) { int i; u8 *data; @@ -649,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, int gfp_mask) + int newheadroom, int newtailroom, + unsigned int __nocast gfp_mask) { /* * Allocate the copy buffer @@ -1500,6 +1501,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/** + * skb_prepare_seq_read - Prepare a sequential read of skb data + * @skb: the buffer to read + * @from: lower offset of data to be read + * @to: upper offset of data to be read + * @st: state variable + * + * Initializes the specified state variable. Must be called before + * invoking skb_seq_read() for the first time. + */ +void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, + unsigned int to, struct skb_seq_state *st) +{ + st->lower_offset = from; + st->upper_offset = to; + st->root_skb = st->cur_skb = skb; + st->frag_idx = st->stepped_offset = 0; + st->frag_data = NULL; +} + +/** + * skb_seq_read - Sequentially read skb data + * @consumed: number of bytes consumed by the caller so far + * @data: destination pointer for data to be returned + * @st: state variable + * + * Reads a block of skb data at &consumed relative to the + * lower offset specified to skb_prepare_seq_read(). Assigns + * the head of the data block to &data and returns the length + * of the block or 0 if the end of the skb data or the upper + * offset has been reached. + * + * The caller is not required to consume all of the data + * returned, i.e. &consumed is typically set to the number + * of bytes already consumed and the next call to + * skb_seq_read() will return the remaining part of the block. + * + * Note: The size of each block of data returned can be arbitary, + * this limitation is the cost for zerocopy seqeuental + * reads of potentially non linear data. + * + * Note: Fragment lists within fragments are not implemented + * at the moment, state->root_skb could be replaced with + * a stack for this purpose. + */ +unsigned int skb_seq_read(unsigned int consumed, const u8 **data, + struct skb_seq_state *st) +{ + unsigned int block_limit, abs_offset = consumed + st->lower_offset; + skb_frag_t *frag; + + if (unlikely(abs_offset >= st->upper_offset)) + return 0; + +next_skb: + block_limit = skb_headlen(st->cur_skb); + + if (abs_offset < block_limit) { + *data = st->cur_skb->data + abs_offset; + return block_limit - abs_offset; + } + + if (st->frag_idx == 0 && !st->frag_data) + st->stepped_offset += skb_headlen(st->cur_skb); + + while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; + block_limit = frag->size + st->stepped_offset; + + if (abs_offset < block_limit) { + if (!st->frag_data) + st->frag_data = kmap_skb_frag(frag); + + *data = (u8 *) st->frag_data + frag->page_offset + + (abs_offset - st->stepped_offset); + + return block_limit - abs_offset; + } + + if (st->frag_data) { + kunmap_skb_frag(st->frag_data); + st->frag_data = NULL; + } + + st->frag_idx++; + st->stepped_offset += frag->size; + } + + if (st->cur_skb->next) { + st->cur_skb = st->cur_skb->next; + st->frag_idx = 0; + goto next_skb; + } else if (st->root_skb == st->cur_skb && + skb_shinfo(st->root_skb)->frag_list) { + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + goto next_skb; + } + + return 0; +} + +/** + * skb_abort_seq_read - Abort a sequential read of skb data + * @st: state variable + * + * Must be called if skb_seq_read() was not called until it + * returned 0. + */ +void skb_abort_seq_read(struct skb_seq_state *st) +{ + if (st->frag_data) + kunmap_skb_frag(st->frag_data); +} + +#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) + +static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, + struct ts_config *conf, + struct ts_state *state) +{ + return skb_seq_read(offset, text, TS_SKB_CB(state)); +} + +static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) +{ + skb_abort_seq_read(TS_SKB_CB(state)); +} + +/** + * skb_find_text - Find a text pattern in skb data + * @skb: the buffer to look in + * @from: search offset + * @to: search limit + * @config: textsearch configuration + * @state: uninitialized textsearch state variable + * + * Finds a pattern in the skb data according to the specified + * textsearch configuration. Use textsearch_next() to retrieve + * subsequent occurrences of the pattern. Returns the offset + * to the first occurrence or UINT_MAX if no match was found. + */ +unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, + unsigned int to, struct ts_config *config, + struct ts_state *state) +{ + config->get_next_block = skb_ts_get_next_block; + config->finish = skb_ts_finish; + + skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); + + return textsearch_find(config, state); +} + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -1538,3 +1692,7 @@ EXPORT_SYMBOL(skb_queue_tail); EXPORT_SYMBOL(skb_unlink); EXPORT_SYMBOL(skb_append); EXPORT_SYMBOL(skb_split); +EXPORT_SYMBOL(skb_prepare_seq_read); +EXPORT_SYMBOL(skb_seq_read); +EXPORT_SYMBOL(skb_abort_seq_read); +EXPORT_SYMBOL(skb_find_text); diff --git a/net/core/sock.c b/net/core/sock.c index a6ec3ada7f9..12f6d9a2a52 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -206,13 +206,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ #ifdef SO_DONTLINGER /* Compatibility item... */ - switch (optname) { - case SO_DONTLINGER: - sock_reset_flag(sk, SOCK_LINGER); - return 0; + if (optname == SO_DONTLINGER) { + lock_sock(sk); + sock_reset_flag(sk, SOCK_LINGER); + release_sock(sk); + return 0; } -#endif - +#endif + if(optlen<sizeof(int)) return(-EINVAL); @@ -622,7 +623,8 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) +struct sock *sk_alloc(int family, unsigned int __nocast priority, + struct proto *prot, int zero_it) { struct sock *sk = NULL; kmem_cache_t *slab = prot->slab; @@ -750,7 +752,8 @@ unsigned long sock_i_ino(struct sock *sk) /* * Allocate a skb from the socket's send buffer. */ -struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -765,7 +768,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a skb from the socket's receive buffer. */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, + unsigned int __nocast priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -780,7 +784,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, int priority) +void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c8be646cb19..8f817ad9f54 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -13,12 +13,8 @@ #ifdef CONFIG_SYSCTL extern int netdev_max_backlog; +extern int netdev_budget; extern int weight_p; -extern int no_cong_thresh; -extern int no_cong; -extern int lo_cong; -extern int mod_cong; -extern int netdev_fastroute; extern int net_msg_cost; extern int net_msg_burst; @@ -35,19 +31,6 @@ extern int sysctl_somaxconn; extern char sysctl_divert_version[]; #endif /* CONFIG_NET_DIVERT */ -/* - * This strdup() is used for creating copies of network - * device names to be handed over to sysctl. - */ - -char *net_sysctl_strdup(const char *s) -{ - char *rv = kmalloc(strlen(s)+1, GFP_KERNEL); - if (rv) - strcpy(rv, s); - return rv; -} - ctl_table core_table[] = { #ifdef CONFIG_NET { @@ -99,38 +82,6 @@ ctl_table core_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = NET_CORE_NO_CONG_THRESH, - .procname = "no_cong_thresh", - .data = &no_cong_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_NO_CONG, - .procname = "no_cong", - .data = &no_cong, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_LO_CONG, - .procname = "lo_cong", - .data = &lo_cong, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_MOD_CONG, - .procname = "mod_cong", - .data = &mod_cong, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", .data = &net_msg_cost, @@ -174,9 +125,15 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = NET_CORE_BUDGET, + .procname = "netdev_budget", + .data = &netdev_budget, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = 0 } }; -EXPORT_SYMBOL(net_sysctl_strdup); - #endif diff --git a/net/core/utils.c b/net/core/utils.c index e11a8654f36..88eb8b68e26 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -23,10 +23,10 @@ #include <linux/percpu.h> #include <linux/init.h> +#include <asm/byteorder.h> #include <asm/system.h> #include <asm/uaccess.h> - /* This is a maximally equidistributed combined Tausworthe generator based on code from GNU Scientific Library 1.5 (30 Jun 2004) @@ -153,3 +153,38 @@ int net_ratelimit(void) EXPORT_SYMBOL(net_random); EXPORT_SYMBOL(net_ratelimit); EXPORT_SYMBOL(net_srandom); + +/* + * Convert an ASCII string to binary IP. + * This is outside of net/ipv4/ because various code that uses IP addresses + * is otherwise not dependent on the TCP/IP stack. + */ + +__u32 in_aton(const char *str) +{ + unsigned long l; + unsigned int val; + int i; + + l = 0; + for (i = 0; i < 4; i++) + { + l <<= 8; + if (*str != '\0') + { + val = 0; + while (*str != '\0' && *str != '.') + { + val *= 10; + val += *str - '0'; + str++; + } + l |= val; + if (*str != '\0') + str++; + } + } + return(htonl(l)); +} + +EXPORT_SYMBOL(in_aton); diff --git a/net/core/wireless.c b/net/core/wireless.c index b2fe378dfbf..3ff5639c0b7 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1102,6 +1102,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); r = NLMSG_DATA(nlh); r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; r->ifi_type = dev->type; r->ifi_index = dev->ifindex; r->ifi_flags = dev->flags; |