summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/bridge/br_netfilter.c19
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/decnet/af_decnet.c25
-rw-r--r--net/decnet/sysctl_net_decnet.c33
-rw-r--r--net/ieee80211/Kconfig2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/netfilter/Kconfig8
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c20
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c3
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c5
-rw-r--r--net/ipv4/tcp_output.c233
-rw-r--r--net/ipv4/tcp_vegas.c16
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/addrconf.c137
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c16
-rw-r--r--net/ipv6/mcast.c142
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c12
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/tcp_ipv6.c19
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/packet/af_packet.c115
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sctp/socket.c16
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c6
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/xfrm/xfrm_policy.c49
-rw-r--r--net/xfrm/xfrm_state.c5
41 files changed, 639 insertions, 326 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 91e412b0ab0..67465b65abe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -753,6 +753,8 @@ static int vlan_ioctl_handler(void __user *arg)
break;
case GET_VLAN_REALDEV_NAME_CMD:
err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+ if (err)
+ goto out;
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args))) {
err = -EFAULT;
@@ -761,6 +763,8 @@ static int vlan_ioctl_handler(void __user *arg)
case GET_VLAN_VID_CMD:
err = vlan_dev_get_vid(args.device1, &vid);
+ if (err)
+ goto out;
args.u.VID = vid;
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args))) {
@@ -774,7 +778,7 @@ static int vlan_ioctl_handler(void __user *arg)
__FUNCTION__, args.cmd);
return -EINVAL;
};
-
+out:
return err;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b7486488967..f2a8750bbf1 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,6 +165,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */
+ /* Need to correct hardware checksum */
+ skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+
/* Ok, lets check to make sure the device (dev) we
* came in on is what this VLAN is attached to.
*/
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d8e36b77512..23422bd53a5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -295,7 +295,7 @@ static int check_hbh_len(struct sk_buff *skb)
len -= 2;
while (len > 0) {
- int optlen = raw[off+1]+2;
+ int optlen = skb->nh.raw[off+1]+2;
switch (skb->nh.raw[off]) {
case IPV6_TLV_PAD0:
@@ -308,18 +308,15 @@ static int check_hbh_len(struct sk_buff *skb)
case IPV6_TLV_JUMBO:
if (skb->nh.raw[off+1] != 4 || (off&3) != 2)
goto bad;
-
pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2));
-
+ if (pkt_len <= IPV6_MAXPLEN ||
+ skb->nh.ipv6h->payload_len)
+ goto bad;
if (pkt_len > skb->len - sizeof(struct ipv6hdr))
goto bad;
- if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
- if (__pskb_trim(skb,
- pkt_len + sizeof(struct ipv6hdr)))
- goto bad;
- if (skb->ip_summed == CHECKSUM_HW)
- skb->ip_summed = CHECKSUM_NONE;
- }
+ if (pskb_trim_rcsum(skb,
+ pkt_len+sizeof(struct ipv6hdr)))
+ goto bad;
break;
default:
if (optlen > len)
@@ -372,6 +369,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
goto inhdr_error;
+ nf_bridge_put(skb->nf_bridge);
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
setup_pre_routing(skb);
@@ -455,6 +453,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
skb->ip_summed = CHECKSUM_NONE;
}
+ nf_bridge_put(skb->nf_bridge);
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
setup_pre_routing(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b48e294aaf..a5efc9ae010 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1113,7 +1113,8 @@ out:
void netdev_rx_csum_fault(struct net_device *dev)
{
if (net_ratelimit()) {
- printk(KERN_ERR "%s: hw csum failure.\n", dev->name);
+ printk(KERN_ERR "%s: hw csum failure.\n",
+ dev ? dev->name : "<unknown>");
dump_stack();
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 2841bfce29d..3a10e0bc90e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -293,7 +293,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
struct sock_filter *ftest;
int pc;
- if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0)
+ if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
/* check the filter code now */
@@ -360,7 +360,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
int err;
/* Make sure new filter is there and in the right amounts. */
- if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+ if (fprog->filter == NULL)
return -EINVAL;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7d13a4fff4..83fee37de38 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1725,7 +1725,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
* of the skb if any page alloc fails user this procedure returns -ENOMEM
*/
int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
- int getfrag(void *from, char *to, int offset,
+ int (*getfrag)(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length)
{
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ca03521112c..656e13e38cf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1251,7 +1251,7 @@ static int dccp_v4_destroy_sock(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
/*
- * DCCP doesn't use sk_qrite_queue, just sk_send_head
+ * DCCP doesn't use sk_write_queue, just sk_send_head
* for retransmissions
*/
if (sk->sk_send_head != NULL) {
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index f89e55f814d..d402e9020c6 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -153,6 +153,7 @@ static struct proto_ops dn_proto_ops;
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
+static atomic_t decnet_memory_allocated;
static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -446,10 +447,26 @@ static void dn_destruct(struct sock *sk)
dst_release(xchg(&sk->sk_dst_cache, NULL));
}
+static int dn_memory_pressure;
+
+static void dn_enter_memory_pressure(void)
+{
+ if (!dn_memory_pressure) {
+ dn_memory_pressure = 1;
+ }
+}
+
static struct proto dn_proto = {
- .name = "DECNET",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct dn_sock),
+ .name = "NSP",
+ .owner = THIS_MODULE,
+ .enter_memory_pressure = dn_enter_memory_pressure,
+ .memory_pressure = &dn_memory_pressure,
+ .memory_allocated = &decnet_memory_allocated,
+ .sysctl_mem = sysctl_decnet_mem,
+ .sysctl_wmem = sysctl_decnet_wmem,
+ .sysctl_rmem = sysctl_decnet_rmem,
+ .max_header = DN_MAX_NSP_DATA_HEADER + 64,
+ .obj_size = sizeof(struct dn_sock),
};
static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
@@ -470,6 +487,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
+ sk->sk_sndbuf = sysctl_decnet_wmem[1];
+ sk->sk_rcvbuf = sysctl_decnet_rmem[1];
/* Initialization of DECnet Session Control Port */
scp = DN_SK(sk);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 02bca49cb50..0e9d2c57116 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -10,6 +10,7 @@
*
* Changes:
* Steve Whitehouse - C99 changes and default device handling
+ * Steve Whitehouse - Memory buffer settings, like the tcp ones
*
*/
#include <linux/config.h>
@@ -37,6 +38,11 @@ int decnet_dr_count = 3;
int decnet_log_martians = 1;
int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
+/* Reasonable defaults, I hope, based on tcp's defaults */
+int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+
#ifdef CONFIG_SYSCTL
extern int decnet_dst_gc_interval;
static int min_decnet_time_wait[] = { 5 };
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = {
.extra1 = &min_decnet_no_fc_max_cwnd,
.extra2 = &max_decnet_no_fc_max_cwnd
},
+ {
+ .ctl_name = NET_DECNET_MEM,
+ .procname = "decnet_mem",
+ .data = &sysctl_decnet_mem,
+ .maxlen = sizeof(sysctl_decnet_mem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = NET_DECNET_RMEM,
+ .procname = "decnet_rmem",
+ .data = &sysctl_decnet_rmem,
+ .maxlen = sizeof(sysctl_decnet_rmem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = NET_DECNET_WMEM,
+ .procname = "decnet_wmem",
+ .data = &sysctl_decnet_wmem,
+ .maxlen = sizeof(sysctl_decnet_wmem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
{
.ctl_name = NET_DECNET_DEBUG_LEVEL,
.procname = "debug",
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 91b16fbf91f..d18ccba3ea9 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -55,7 +55,7 @@ config IEEE80211_CRYPT_CCMP
config IEEE80211_CRYPT_TKIP
tristate "IEEE 802.11i TKIP encryption"
- depends on IEEE80211
+ depends on IEEE80211 && NET_RADIO
select CRYPTO
select CRYPTO_MICHAEL_MIC
---help---
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4c347c3b8e..46f9d9cf7a5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -618,7 +618,7 @@ static int ipgre_rcv(struct sk_buff *skb)
skb->mac.raw = skb->nh.raw;
skb->nh.raw = __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb->mac.raw, offset);
+ skb_postpull_rcsum(skb, skb->h.raw, offset);
memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0bc00528d88..88a60650e6b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
instead of the individual packets.
config IP_NF_CONNTRACK_EVENTS
- bool "Connection tracking events"
- depends on IP_NF_CONNTRACK
+ bool "Connection tracking events (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && IP_NF_CONNTRACK
help
If this option is enabled, the connection tracking code will
provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
IF unsure, say `N'.
config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface'
- depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+ tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+ depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
help
This option enables support for a netlink-based userspace interface
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258f..d0a447e520a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
# conntrack netlink interface
obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7a4ecddd597..84c66dbfeda 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
return 1;
}
+void ip_conntrack_flush(void)
+{
+ ip_ct_iterate_cleanup(kill_all, NULL);
+}
+
static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
{
if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
get_order(sizeof(struct list_head) * size));
}
-void ip_conntrack_flush(void)
+/* Mishearing the voices in his head, our hero wonders how he's
+ supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
{
+ ip_ct_attach = NULL;
+
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void)
ip_ct_event_cache_flush();
i_see_dead_people:
- ip_ct_iterate_cleanup(kill_all, NULL);
+ ip_conntrack_flush();
if (atomic_read(&ip_conntrack_count) != 0) {
schedule();
goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void)
/* wait until all references to ip_conntrack_untracked are dropped */
while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
schedule();
-}
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
- ip_ct_attach = NULL;
- ip_conntrack_flush();
kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep);
free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 3fce91bcc0b..91fe8f2e38f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -503,7 +503,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
}
static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_NUM-1] = sizeof(u_int16_t),
+ [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
[CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
[CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
@@ -528,7 +528,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
if (!tb[CTA_PROTO_NUM-1])
return -EINVAL;
- tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+ tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
@@ -728,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
}
}
- if (del_timer(&ct->timeout)) {
- ip_conntrack_put(ct);
+ if (del_timer(&ct->timeout))
ct->timeout.function((unsigned long)ct);
- return 0;
- }
+
ip_conntrack_put(ct);
DEBUGP("leaving\n");
@@ -877,7 +875,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
DEBUGP("NAT status: %lu\n",
status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
- if (ip_nat_initialized(ct, hooknum))
+ if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
return -EEXIST;
ip_nat_setup_info(ct, &range, hooknum);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index aeb7353d477..e7fa29e576d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s,
static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct)
{
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+ struct nfattr *nest_parms;
read_lock_bh(&tcp_lock);
+ nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
read_unlock_bh(&tcp_lock);
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 2215317c76b..43c3bd7c118 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp)
{
- exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+ struct ip_conntrack *ct = exp->master;
+
+ exp->saved_proto.udp.port
+ = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = ip_nat_follow_master;
if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 029c70dfb58..b7325e0b406 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
{
- if (skb != NULL) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- int tcp_header_size = tp->tcp_header_len;
- struct tcphdr *th;
- int sysctl_flags;
- int err;
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet;
+ struct tcp_sock *tp;
+ struct tcp_skb_cb *tcb;
+ int tcp_header_size;
+ struct tcphdr *th;
+ int sysctl_flags;
+ int err;
+
+ BUG_ON(!skb || !tcp_skb_pcount(skb));
+
+ /* If congestion control is doing timestamping, we must
+ * take such a timestamp before we potentially clone/copy.
+ */
+ if (icsk->icsk_ca_ops->rtt_sample)
+ __net_timestamp(skb);
+
+ if (likely(clone_it)) {
+ if (unlikely(skb_cloned(skb)))
+ skb = pskb_copy(skb, gfp_mask);
+ else
+ skb = skb_clone(skb, gfp_mask);
+ if (unlikely(!skb))
+ return -ENOBUFS;
+ }
- BUG_ON(!tcp_skb_pcount(skb));
+ inet = inet_sk(sk);
+ tp = tcp_sk(sk);
+ tcb = TCP_SKB_CB(skb);
+ tcp_header_size = tp->tcp_header_len;
#define SYSCTL_FLAG_TSTAMPS 0x1
#define SYSCTL_FLAG_WSCALE 0x2
#define SYSCTL_FLAG_SACK 0x4
- /* If congestion control is doing timestamping */
- if (icsk->icsk_ca_ops->rtt_sample)
- __net_timestamp(skb);
-
- sysctl_flags = 0;
- if (tcb->flags & TCPCB_FLAG_SYN) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
- if(sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
- if(sysctl_tcp_sack) {
- sysctl_flags |= SYSCTL_FLAG_SACK;
- if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
- }
- } else if (tp->rx_opt.eff_sacks) {
- /* A SACK is 2 pad bytes, a 2 byte header, plus
- * 2 32-bit sequence numbers for each SACK block.
- */
- tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ sysctl_flags = 0;
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+ if(sysctl_tcp_timestamps) {
+ tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
}
-
- if (tcp_packets_in_flight(tp) == 0)
- tcp_ca_event(sk, CA_EVENT_TX_START);
-
- th = (struct tcphdr *) skb_push(skb, tcp_header_size);
- skb->h.th = th;
- skb_set_owner_w(skb, sk);
-
- /* Build TCP header and checksum it. */
- th->source = inet->sport;
- th->dest = inet->dport;
- th->seq = htonl(tcb->seq);
- th->ack_seq = htonl(tp->rcv_nxt);
- *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
- if (tcb->flags & TCPCB_FLAG_SYN) {
- /* RFC1323: The window in SYN & SYN/ACK segments
- * is never scaled.
- */
- th->window = htons(tp->rcv_wnd);
- } else {
- th->window = htons(tcp_select_window(sk));
+ if (sysctl_tcp_window_scaling) {
+ tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_WSCALE;
}
- th->check = 0;
- th->urg_ptr = 0;
-
- if (tp->urg_mode &&
- between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
- th->urg_ptr = htons(tp->snd_up-tcb->seq);
- th->urg = 1;
+ if (sysctl_tcp_sack) {
+ sysctl_flags |= SYSCTL_FLAG_SACK;
+ if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+ tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
}
+ } else if (unlikely(tp->rx_opt.eff_sacks)) {
+ /* A SACK is 2 pad bytes, a 2 byte header, plus
+ * 2 32-bit sequence numbers for each SACK block.
+ */
+ tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+ (tp->rx_opt.eff_sacks *
+ TCPOLEN_SACK_PERBLOCK));
+ }
+
+ if (tcp_packets_in_flight(tp) == 0)
+ tcp_ca_event(sk, CA_EVENT_TX_START);
+
+ th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+ skb->h.th = th;
+ skb_set_owner_w(skb, sk);
+
+ /* Build TCP header and checksum it. */
+ th->source = inet->sport;
+ th->dest = inet->dport;
+ th->seq = htonl(tcb->seq);
+ th->ack_seq = htonl(tp->rcv_nxt);
+ *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
+ tcb->flags);
+
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ /* RFC1323: The window in SYN & SYN/ACK segments
+ * is never scaled.
+ */
+ th->window = htons(tp->rcv_wnd);
+ } else {
+ th->window = htons(tcp_select_window(sk));
+ }
+ th->check = 0;
+ th->urg_ptr = 0;
- if (tcb->flags & TCPCB_FLAG_SYN) {
- tcp_syn_build_options((__u32 *)(th + 1),
- tcp_advertise_mss(sk),
- (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
- (sysctl_flags & SYSCTL_FLAG_SACK),
- (sysctl_flags & SYSCTL_FLAG_WSCALE),
- tp->rx_opt.rcv_wscale,
- tcb->when,
- tp->rx_opt.ts_recent);
- } else {
- tcp_build_and_update_options((__u32 *)(th + 1),
- tp, tcb->when);
+ if (unlikely(tp->urg_mode &&
+ between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+ th->urg_ptr = htons(tp->snd_up-tcb->seq);
+ th->urg = 1;
+ }
- TCP_ECN_send(sk, tp, skb, tcp_header_size);
- }
- tp->af_specific->send_check(sk, th, skb->len, skb);
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ tcp_syn_build_options((__u32 *)(th + 1),
+ tcp_advertise_mss(sk),
+ (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+ (sysctl_flags & SYSCTL_FLAG_SACK),
+ (sysctl_flags & SYSCTL_FLAG_WSCALE),
+ tp->rx_opt.rcv_wscale,
+ tcb->when,
+ tp->rx_opt.ts_recent);
+ } else {
+ tcp_build_and_update_options((__u32 *)(th + 1),
+ tp, tcb->when);
+ TCP_ECN_send(sk, tp, skb, tcp_header_size);
+ }
- if (tcb->flags & TCPCB_FLAG_ACK)
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+ tp->af_specific->send_check(sk, th, skb->len, skb);
- if (skb->len != tcp_header_size)
- tcp_event_data_sent(tp, skb, sk);
+ if (likely(tcb->flags & TCPCB_FLAG_ACK))
+ tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ if (skb->len != tcp_header_size)
+ tcp_event_data_sent(tp, skb, sk);
- err = tp->af_specific->queue_xmit(skb, 0);
- if (err <= 0)
- return err;
+ TCP_INC_STATS(TCP_MIB_OUTSEGS);
- tcp_enter_cwr(sk);
+ err = tp->af_specific->queue_xmit(skb, 0);
+ if (unlikely(err <= 0))
+ return err;
+
+ tcp_enter_cwr(sk);
+
+ /* NET_XMIT_CN is special. It does not guarantee,
+ * that this packet is lost. It tells that device
+ * is about to start to drop packets or already
+ * drops some packets of the same priority and
+ * invokes us to send less aggressively.
+ */
+ return err == NET_XMIT_CN ? 0 : err;
- /* NET_XMIT_CN is special. It does not guarantee,
- * that this packet is lost. It tells that device
- * is about to start to drop packets or already
- * drops some packets of the same priority and
- * invokes us to send less aggressively.
- */
- return err == NET_XMIT_CN ? 0 : err;
- }
- return -ENOBUFS;
#undef SYSCTL_FLAG_TSTAMPS
#undef SYSCTL_FLAG_WSCALE
#undef SYSCTL_FLAG_SACK
@@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+ if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
break;
/* Advance the send_head. This one is sent out.
@@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
/* Send it out now. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+ if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
update_send_head(sk, tp, skb);
tcp_cwnd_validate(sk, tp);
return;
@@ -1429,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
- pskb_copy(skb, GFP_ATOMIC):
- skb_clone(skb, GFP_ATOMIC)));
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (err == 0) {
/* Update global TCP statistics. */
@@ -1665,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- if (tcp_transmit_skb(sk, skb))
+ if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
}
@@ -1700,7 +1715,7 @@ int tcp_send_synack(struct sock *sk)
TCP_ECN_send_synack(tcp_sk(sk), skb);
}
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+ return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
/*
@@ -1861,7 +1876,7 @@ int tcp_connect(struct sock *sk)
__skb_queue_tail(&sk->sk_write_queue, buff);
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
- tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+ tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
@@ -1957,7 +1972,7 @@ void tcp_send_ack(struct sock *sk)
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, buff);
+ tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
}
@@ -1997,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- return tcp_transmit_skb(sk, skb);
+ return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
int tcp_write_wakeup(struct sock *sk)
@@ -2030,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err) {
update_send_head(sk, tp, skb);
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b7d296a8ac6..13e7e6e8df1 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
vegas->beg_snd_nxt = tp->snd_nxt;
vegas->beg_snd_cwnd = tp->snd_cwnd;
- /* Take into account the current RTT sample too, to
- * decrease the impact of delayed acks. This double counts
- * this sample since we count it for the next window as well,
- * but that's not too awful, since we're taking the min,
- * rather than averaging.
- */
- tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
-
/* We do the Vegas calculations only if we got enough RTT
* samples that we can be reasonably sure that we got
* at least one RTT sample that wasn't from a delayed ACK.
@@ -333,11 +325,11 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
tp->snd_cwnd = tp->snd_cwnd_clamp;
}
- }
- /* Wipe the slate clean for the next RTT. */
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
+ /* Wipe the slate clean for the next RTT. */
+ vegas->cntRTT = 0;
+ vegas->minRTT = 0x7fffffff;
+ }
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cd..42196ba3b0b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_UDP:
case IPPROTO_TCP:
case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
u16 *ports = (u16 *)xprth;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 76ff9f4fe89..a60585fd85a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,6 +137,7 @@ static int addrconf_ifdown(struct net_device *dev, int how);
static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
static void addrconf_dad_timer(unsigned long data);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -379,8 +380,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
dev->type == ARPHRD_NONE ||
dev->type == ARPHRD_SIT) {
printk(KERN_INFO
- "Disabled Privacy Extensions on device %p(%s)\n",
- dev, dev->name);
+ "%s: Disabled Privacy Extensions\n",
+ dev->name);
ndev->cnf.use_tempaddr = -1;
} else {
in6_dev_hold(ndev);
@@ -388,6 +389,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
}
#endif
+ if (netif_carrier_ok(dev))
+ ndev->if_flags |= IF_READY;
+
write_lock_bh(&addrconf_lock);
dev->ip6_ptr = ndev;
write_unlock_bh(&addrconf_lock);
@@ -415,6 +419,7 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
if ((idev = ipv6_add_dev(dev)) == NULL)
return NULL;
}
+
if (dev->flags&IFF_UP)
ipv6_mc_up(idev);
return idev;
@@ -634,8 +639,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
#endif
- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
- ifap = &ifa->if_next) {
+ for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
if (ifa == ifp) {
*ifap = ifa->if_next;
__in6_ifa_put(ifp);
@@ -643,6 +647,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
break;
deleted = 1;
+ continue;
} else if (ifp->flags & IFA_F_PERMANENT) {
if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
ifp->prefix_len)) {
@@ -666,6 +671,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
}
}
+ ifap = &ifa->if_next;
}
write_unlock_bh(&idev->lock);
@@ -903,11 +909,18 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
score.addr_type = __ipv6_addr_type(&ifa->addr);
- /* Rule 0: Candidate Source Address (section 4)
+ /* Rule 0:
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense.
+ * - Candidate Source Address (section 4)
* - In any case, anycast addresses, multicast
* addresses, and the unspecified address MUST
* NOT be included in a candidate set.
*/
+ if (ifa->flags & IFA_F_TENTATIVE)
+ continue;
if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
score.addr_type & IPV6_ADDR_MULTICAST)) {
LIMIT_NETDEBUG(KERN_DEBUG
@@ -1215,10 +1228,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
/* Gets referenced address, destroys ifaddr */
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+void addrconf_dad_stop(struct inet6_ifaddr *ifp)
{
- if (net_ratelimit())
- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
addrconf_del_timer(ifp);
@@ -1244,6 +1255,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
ipv6_del_addr(ifp);
}
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+ if (net_ratelimit())
+ printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+ addrconf_dad_stop(ifp);
+}
/* Join to solicited addr multicast group. */
@@ -1596,9 +1613,17 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
not good.
*/
if (valid_lft >= 0x7FFFFFFF/HZ)
- rt_expires = 0;
+ rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
else
- rt_expires = jiffies + valid_lft * HZ;
+ rt_expires = valid_lft * HZ;
+
+ /*
+ * We convert this (in jiffies) to clock_t later.
+ * Avoid arithmetic overflow there as well.
+ * Overflow can happen only if HZ < USER_HZ.
+ */
+ if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+ rt_expires = 0x7FFFFFFF / USER_HZ;
if (pinfo->onlink) {
struct rt6_info *rt;
@@ -1610,12 +1635,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
ip6_del_rt(rt, NULL, NULL, NULL);
rt = NULL;
} else {
- rt->rt6i_expires = rt_expires;
+ rt->rt6i_expires = jiffies + rt_expires;
}
}
} else if (valid_lft) {
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
- dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+ dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
}
if (rt)
dst_release(&rt->u.dst);
@@ -2125,9 +2150,42 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
{
struct net_device *dev = (struct net_device *) data;
struct inet6_dev *idev = __in6_dev_get(dev);
+ int run_pending = 0;
switch(event) {
case NETDEV_UP:
+ case NETDEV_CHANGE:
+ if (event == NETDEV_UP) {
+ if (!netif_carrier_ok(dev)) {
+ /* device is not ready yet. */
+ printk(KERN_INFO
+ "ADDRCONF(NETDEV_UP): %s: "
+ "link is not ready\n",
+ dev->name);
+ break;
+ }
+ } else {
+ if (!netif_carrier_ok(dev)) {
+ /* device is still not ready. */
+ break;
+ }
+
+ if (idev) {
+ if (idev->if_flags & IF_READY) {
+ /* device is already configured. */
+ break;
+ }
+ idev->if_flags |= IF_READY;
+ }
+
+ printk(KERN_INFO
+ "ADDRCONF(NETDEV_CHANGE): %s: "
+ "link becomes ready\n",
+ dev->name);
+
+ run_pending = 1;
+ }
+
switch(dev->type) {
case ARPHRD_SIT:
addrconf_sit_config(dev);
@@ -2144,6 +2202,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
};
if (idev) {
+ if (run_pending)
+ addrconf_dad_run(idev);
+
/* If the MTU changed during the interface down, when the
interface up, the changed MTU must be reflected in the
idev as well as routers.
@@ -2178,8 +2239,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
*/
addrconf_ifdown(dev, event != NETDEV_DOWN);
break;
- case NETDEV_CHANGE:
- break;
+
case NETDEV_CHANGENAME:
#ifdef CONFIG_SYSCTL
if (idev) {
@@ -2260,7 +2320,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* Step 3: clear flags for stateless addrconf */
if (how != 1)
- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+ idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
/* Step 4: clear address list */
#ifdef CONFIG_IPV6_PRIVACY
@@ -2369,11 +2429,20 @@ out:
/*
* Duplicate Address Detection
*/
+static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+{
+ unsigned long rand_num;
+ struct inet6_dev *idev = ifp->idev;
+
+ rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+ ifp->probes = idev->cnf.dad_transmits;
+ addrconf_mod_timer(ifp, AC_DAD, rand_num);
+}
+
static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
- unsigned long rand_num;
addrconf_join_solict(dev, &ifp->addr);
@@ -2382,7 +2451,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
flags);
net_srandom(ifp->addr.s6_addr32[3]);
- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
read_lock_bh(&idev->lock);
if (ifp->dead)
@@ -2399,9 +2467,19 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
return;
}
- ifp->probes = idev->cnf.dad_transmits;
- addrconf_mod_timer(ifp, AC_DAD, rand_num);
-
+ if (!(idev->if_flags & IF_READY)) {
+ spin_unlock_bh(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+ /*
+ * If the defice is not ready:
+ * - keep it tentative if it is a permanent address.
+ * - otherwise, kill it.
+ */
+ in6_ifa_hold(ifp);
+ addrconf_dad_stop(ifp);
+ return;
+ }
+ addrconf_dad_kick(ifp);
spin_unlock_bh(&ifp->lock);
out:
read_unlock_bh(&idev->lock);
@@ -2484,6 +2562,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
}
}
+static void addrconf_dad_run(struct inet6_dev *idev) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+ spin_lock_bh(&ifp->lock);
+ if (!(ifp->flags & IFA_F_TENTATIVE)) {
+ spin_unlock_bh(&ifp->lock);
+ continue;
+ }
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_kick(ifp);
+ }
+ read_unlock_bh(&idev->lock);
+}
+
#ifdef CONFIG_PROC_FS
struct if6_iter_state {
int bucket;
@@ -2689,6 +2783,9 @@ restart:
in6_ifa_hold(ifpub);
spin_unlock(&ifp->lock);
read_unlock(&addrconf_hash_lock);
+ spin_lock(&ifpub->lock);
+ ifpub->regen_count = 0;
+ spin_unlock(&ifpub->lock);
ipv6_create_tempaddr(ifpub, ifp);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40d9a1935ab..8bfbe997079 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -248,7 +248,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
if (esp->conf.padlen)
mtu = ALIGN(mtu, esp->conf.padlen);
- return mtu + x->props.header_len + esp->auth.icv_full_len;
+ return mtu + x->props.header_len + esp->auth.icv_trunc_len;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 34a332225c1..6ec6a2b549b 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -328,8 +328,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
iif = skb->dev->ifindex;
/*
- * Must not send if we know that source is Anycast also.
- * for now we don't know that.
+ * Must not send error if the source does not uniquely
+ * identify a single node (RFC2463 Section 2.4).
+ * We check unspecified / multicast addresses here,
+ * and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
@@ -373,6 +375,16 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
err = ip6_dst_lookup(sk, &dst, &fl);
if (err)
goto out;
+
+ /*
+ * We won't send icmp if the destination is known
+ * anycast.
+ */
+ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+ LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+ goto out_dst_release;
+ }
+
if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
goto out;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd939da090c..f829a4ad3cc 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -170,7 +170,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
-#define IPV6_MLD_MAX_MSF 10
+#define IPV6_MLD_MAX_MSF 64
int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
@@ -224,6 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
mc_lst->ifindex = dev->ifindex;
mc_lst->sfmode = MCAST_EXCLUDE;
+ mc_lst->sflock = RW_LOCK_UNLOCKED;
mc_lst->sflist = NULL;
/*
@@ -360,6 +361,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
struct ip6_sf_socklist *psl;
int i, j, rv;
int leavegroup = 0;
+ int pmclocked = 0;
int err;
if (pgsr->gsr_group.ss_family != AF_INET6 ||
@@ -403,6 +405,9 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
pmc->sfmode = omode;
}
+ write_lock_bh(&pmc->sflock);
+ pmclocked = 1;
+
psl = pmc->sflist;
if (!add) {
if (!psl)
@@ -475,6 +480,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
/* update the interface list */
ip6_mc_add_src(idev, group, omode, 1, source, 1);
done:
+ if (pmclocked)
+ write_unlock_bh(&pmc->sflock);
read_unlock_bh(&ipv6_sk_mc_lock);
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
@@ -510,6 +517,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
dev = idev->dev;
err = 0;
+ read_lock_bh(&ipv6_sk_mc_lock);
+
if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
leavegroup = 1;
goto done;
@@ -549,6 +558,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
newpsl = NULL;
(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
}
+
+ write_lock_bh(&pmc->sflock);
psl = pmc->sflist;
if (psl) {
(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -558,8 +569,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
pmc->sflist = newpsl;
pmc->sfmode = gsf->gf_fmode;
+ write_unlock_bh(&pmc->sflock);
err = 0;
done:
+ read_unlock_bh(&ipv6_sk_mc_lock);
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
dev_put(dev);
@@ -592,6 +605,11 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
dev = idev->dev;
err = -EADDRNOTAVAIL;
+ /*
+ * changes to the ipv6_mc_list require the socket lock and
+ * a read lock on ip6_sk_mc_lock. We have the socket lock,
+ * so reading the list is safe.
+ */
for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
if (pmc->ifindex != gsf->gf_interface)
@@ -614,6 +632,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
return -EFAULT;
}
+ /* changes to psl require the socket lock, a read lock on
+ * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
+ * have the socket lock, so reading here is safe.
+ */
for (i=0; i<copycount; i++) {
struct sockaddr_in6 *psin6;
struct sockaddr_storage ss;
@@ -650,6 +672,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
read_unlock(&ipv6_sk_mc_lock);
return 1;
}
+ read_lock(&mc->sflock);
psl = mc->sflist;
if (!psl) {
rv = mc->sfmode == MCAST_EXCLUDE;
@@ -665,6 +688,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
rv = 0;
}
+ read_unlock(&mc->sflock);
read_unlock(&ipv6_sk_mc_lock);
return rv;
@@ -1068,7 +1092,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
ma->mca_flags |= MAF_TIMER_RUNNING;
}
-static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+/* mark EXCLUDE-mode sources */
+static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
struct in6_addr *srcs)
{
struct ip6_sf_list *psf;
@@ -1078,13 +1103,53 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
if (scount == nsrcs)
break;
- for (i=0; i<nsrcs; i++)
+ for (i=0; i<nsrcs; i++) {
+ /* skip inactive filters */
+ if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+ pmc->mca_sfcount[MCAST_EXCLUDE] !=
+ psf->sf_count[MCAST_EXCLUDE])
+ continue;
+ if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+ scount++;
+ break;
+ }
+ }
+ }
+ pmc->mca_flags &= ~MAF_GSQUERY;
+ if (scount == nsrcs) /* all sources excluded */
+ return 0;
+ return 1;
+}
+
+static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+ struct in6_addr *srcs)
+{
+ struct ip6_sf_list *psf;
+ int i, scount;
+
+ if (pmc->mca_sfmode == MCAST_EXCLUDE)
+ return mld_xmarksources(pmc, nsrcs, srcs);
+
+ /* mark INCLUDE-mode sources */
+
+ scount = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (scount == nsrcs)
+ break;
+ for (i=0; i<nsrcs; i++) {
if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
psf->sf_gsresp = 1;
scount++;
break;
}
+ }
+ }
+ if (!scount) {
+ pmc->mca_flags &= ~MAF_GSQUERY;
+ return 0;
}
+ pmc->mca_flags |= MAF_GSQUERY;
+ return 1;
}
int igmp6_event_query(struct sk_buff *skb)
@@ -1167,7 +1232,7 @@ int igmp6_event_query(struct sk_buff *skb)
/* mark sources to include, if group & source-specific */
if (mlh2->nsrcs != 0) {
if (!pskb_may_pull(skb, srcs_offset +
- mlh2->nsrcs * sizeof(struct in6_addr))) {
+ ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
in6_dev_put(idev);
return -EINVAL;
}
@@ -1203,10 +1268,9 @@ int igmp6_event_query(struct sk_buff *skb)
else
ma->mca_flags &= ~MAF_GSQUERY;
}
- if (ma->mca_flags & MAF_GSQUERY)
- mld_marksources(ma, ntohs(mlh2->nsrcs),
- mlh2->srcs);
- igmp6_group_queried(ma, max_delay);
+ if (!(ma->mca_flags & MAF_GSQUERY) ||
+ mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
+ igmp6_group_queried(ma, max_delay);
spin_unlock_bh(&ma->mca_lock);
if (group_type != IPV6_ADDR_ANY)
break;
@@ -1281,7 +1345,18 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
case MLD2_MODE_IS_EXCLUDE:
if (gdeleted || sdeleted)
return 0;
- return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+ if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
+ if (pmc->mca_sfmode == MCAST_INCLUDE)
+ return 1;
+ /* don't include if this source is excluded
+ * in all filters
+ */
+ if (psf->sf_count[MCAST_INCLUDE])
+ return 0;
+ return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+ psf->sf_count[MCAST_EXCLUDE];
+ }
+ return 0;
case MLD2_CHANGE_TO_INCLUDE:
if (gdeleted || sdeleted)
return 0;
@@ -1450,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_report *pmr;
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
- int scount, first, isquery, truncate;
+ int scount, stotal, first, isquery, truncate;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
@@ -1460,25 +1535,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
truncate = type == MLD2_MODE_IS_EXCLUDE ||
type == MLD2_CHANGE_TO_EXCLUDE;
+ stotal = scount = 0;
+
psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
- if (!*psf_list) {
- if (type == MLD2_ALLOW_NEW_SOURCES ||
- type == MLD2_BLOCK_OLD_SOURCES)
- return skb;
- if (pmc->mca_crcount || isquery) {
- /* make sure we have room for group header and at
- * least one source.
- */
- if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
- sizeof(struct in6_addr)) {
- mld_sendpack(skb);
- skb = NULL; /* add_grhead will get a new one */
- }
- skb = add_grhead(skb, pmc, type, &pgr);
- }
- return skb;
- }
+ if (!*psf_list)
+ goto empty_source;
+
pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
/* EX and TO_EX get a fresh packet, if needed */
@@ -1491,7 +1554,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
}
}
first = 1;
- scount = 0;
psf_prev = NULL;
for (psf=*psf_list; psf; psf=psf_next) {
struct in6_addr *psrc;
@@ -1525,7 +1587,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
}
psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
*psrc = psf->sf_addr;
- scount++;
+ scount++; stotal++;
if ((type == MLD2_ALLOW_NEW_SOURCES ||
type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
psf->sf_crcount--;
@@ -1540,6 +1602,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
}
psf_prev = psf;
}
+
+empty_source:
+ if (!stotal) {
+ if (type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES)
+ return skb;
+ if (pmc->mca_crcount || isquery) {
+ /* make sure we have room for group header */
+ if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
+ mld_sendpack(skb);
+ skb = NULL; /* add_grhead will get a new one */
+ }
+ skb = add_grhead(skb, pmc, type, &pgr);
+ }
+ }
if (pgr)
pgr->grec_nsrcs = htons(scount);
@@ -1621,11 +1698,11 @@ static void mld_send_cr(struct inet6_dev *idev)
skb = add_grec(skb, pmc, dtype, 1, 1);
}
if (pmc->mca_crcount) {
- pmc->mca_crcount--;
if (pmc->mca_sfmode == MCAST_EXCLUDE) {
type = MLD2_CHANGE_TO_INCLUDE;
skb = add_grec(skb, pmc, type, 1, 0);
}
+ pmc->mca_crcount--;
if (pmc->mca_crcount == 0) {
mld_clear_zeros(&pmc->mca_tomb);
mld_clear_zeros(&pmc->mca_sources);
@@ -1659,12 +1736,12 @@ static void mld_send_cr(struct inet6_dev *idev)
/* filter mode changes */
if (pmc->mca_crcount) {
- pmc->mca_crcount--;
if (pmc->mca_sfmode == MCAST_EXCLUDE)
type = MLD2_CHANGE_TO_EXCLUDE;
else
type = MLD2_CHANGE_TO_INCLUDE;
skb = add_grec(skb, pmc, type, 0, 0);
+ pmc->mca_crcount--;
}
spin_unlock_bh(&pmc->mca_lock);
}
@@ -2023,6 +2100,9 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
+ /* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+ * so no other readers or writers of iml or its sflist
+ */
if (iml->sflist == 0) {
/* any-source empty exclude case */
return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 060d6120241..04912f9b35c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -211,7 +211,7 @@ config IP6_NF_TARGET_REJECT
config IP6_NF_TARGET_NFQUEUE
tristate "NFQUEUE Target Support"
- depends on IP_NF_IPTABLES
+ depends on IP6_NF_IPTABLES
help
This Target replaced the old obsolete QUEUE target.
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c0f1da5497a..a7e03cfacd0 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -68,8 +68,8 @@ static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
[ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
};
- __u8 type = orig->dst.u.icmp.type - 128;
- if (type >= sizeof(invmap) || !invmap[type])
+ int type = orig->dst.u.icmp.type - 128;
+ if (type < 0 || type >= sizeof(invmap) || !invmap[type])
return 0;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
@@ -129,12 +129,12 @@ static int icmpv6_new(struct nf_conn *conntrack,
[ICMPV6_ECHO_REQUEST - 128] = 1,
[ICMPV6_NI_QUERY - 128] = 1
};
+ int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128;
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
+ if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
/* Can't create a new ICMPv6 `conn' with this. */
- DEBUGP("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+ DEBUGP("icmpv6: can't create new conn with type %u\n",
+ type + 128);
NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
return 0;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a7a537b5059..66140f13d11 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -413,11 +413,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
rt = ip6_rt_copy(ort);
if (rt) {
- ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
-
- if (!(rt->rt6i_flags&RTF_GATEWAY))
+ if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+ if (rt->rt6i_dst.plen != 128 &&
+ ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
+ rt->rt6i_flags |= RTF_ANYCAST;
ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+ }
+ ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
rt->u.dst.flags |= DST_HOST;
@@ -829,7 +832,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
}
rt->u.dst.obsolete = -1;
- rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
+ rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
if (nlh && (r = NLMSG_DATA(nlh))) {
rt->rt6i_protocol = r->rtm_protocol;
} else {
@@ -1413,7 +1416,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->u.dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
- if (!anycast)
+ if (anycast)
+ rt->rt6i_flags |= RTF_ANYCAST;
+ else
rt->rt6i_flags |= RTF_LOCAL;
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 62c0e5bd931..8827389abaf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -992,13 +992,12 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
- if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
+ if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
+ ip6_xmit(NULL, buff, &fl, NULL, 0);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
return;
-
- ip6_xmit(NULL, buff, &fl, NULL, 0);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
- return;
+ }
}
kfree_skb(buff);
@@ -1057,11 +1056,11 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
fl.fl_ip_sport = t1->source;
if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
- if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
+ if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
+ ip6_xmit(NULL, buff, &fl, NULL, 0);
+ TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
return;
- ip6_xmit(NULL, buff, &fl, NULL, 0);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
- return;
+ }
}
kfree_skb(buff);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf1d91e74c8..69bd957380e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,6 +214,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_UDP:
case IPPROTO_TCP:
case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
u16 *ports = (u16 *)exthdr;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a84f9221e5f..794c41d19b2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -61,8 +61,8 @@ config NF_CONNTRACK_MARK
instead of the individual packets.
config NF_CONNTRACK_EVENTS
- bool "Connection tracking events"
- depends on NF_CONNTRACK
+ bool "Connection tracking events (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
help
If this option is enabled, the connection tracking code will
provide a notifier chain that can be used by other kernel code
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1da678303d7..a7c7b490cf2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1383,6 +1383,9 @@ void nf_conntrack_cleanup(void)
schedule();
goto i_see_dead_people;
}
+ /* wait until all references to nf_conntrack_untracked are dropped */
+ while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+ schedule();
for (i = 0; i < NF_CT_F_NUM; i++) {
if (nf_ct_cache[i].use == 0)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a60c59b9763..95fdf04f1d8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -162,7 +162,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
return -EINVAL;
}
- min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg));
+ min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
if (unlikely(nlh->nlmsg_len < min_len))
return -EINVAL;
@@ -236,8 +236,7 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
}
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len <
- NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
+ if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
DEBUGP("received message was too short\n");
return 0;
}
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 004e8599b8f..a7d88b5ad75 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -99,7 +99,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit);
+ if (sysctl_netrom_reset_circuit)
nr_disconnect(sk, ECONNRESET);
break;
@@ -130,7 +130,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit);
+ if (sysctl_netrom_reset_circuit)
nr_disconnect(sk, ECONNRESET);
break;
@@ -265,7 +265,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit);
+ if (sysctl_netrom_reset_circuit)
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 499ae3df4a4..3e246276041 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1587,23 +1587,47 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
}
-static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
+static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
{
int i;
- for (i=0; i<len; i++) {
- if (pg_vec[i]) {
- struct page *page, *pend;
-
- pend = pg_vec_endpage(pg_vec[i], order);
- for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
- ClearPageReserved(page);
- free_pages((unsigned long)pg_vec[i], order);
- }
+ for (i = 0; i < len; i++) {
+ if (likely(pg_vec[i]))
+ free_pages((unsigned long) pg_vec[i], order);
}
kfree(pg_vec);
}
+static inline char *alloc_one_pg_vec_page(unsigned long order)
+{
+ return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
+ order);
+}
+
+static char **alloc_pg_vec(struct tpacket_req *req, int order)
+{
+ unsigned int block_nr = req->tp_block_nr;
+ char **pg_vec;
+ int i;
+
+ pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+ if (unlikely(!pg_vec))
+ goto out;
+
+ for (i = 0; i < block_nr; i++) {
+ pg_vec[i] = alloc_one_pg_vec_page(order);
+ if (unlikely(!pg_vec[i]))
+ goto out_free_pgvec;
+ }
+
+out:
+ return pg_vec;
+
+out_free_pgvec:
+ free_pg_vec(pg_vec, order, block_nr);
+ pg_vec = NULL;
+ goto out;
+}
static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
{
@@ -1617,64 +1641,46 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
/* Sanity tests and some calculations */
- if (po->pg_vec)
+ if (unlikely(po->pg_vec))
return -EBUSY;
- if ((int)req->tp_block_size <= 0)
+ if (unlikely((int)req->tp_block_size <= 0))
return -EINVAL;
- if (req->tp_block_size&(PAGE_SIZE-1))
+ if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
return -EINVAL;
- if (req->tp_frame_size < TPACKET_HDRLEN)
+ if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
return -EINVAL;
- if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
+ if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
return -EINVAL;
po->frames_per_block = req->tp_block_size/req->tp_frame_size;
- if (po->frames_per_block <= 0)
+ if (unlikely(po->frames_per_block <= 0))
return -EINVAL;
- if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
+ if (unlikely((po->frames_per_block * req->tp_block_nr) !=
+ req->tp_frame_nr))
return -EINVAL;
- /* OK! */
-
- /* Allocate page vector */
- while ((PAGE_SIZE<<order) < req->tp_block_size)
- order++;
err = -ENOMEM;
-
- pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
- if (pg_vec == NULL)
+ order = get_order(req->tp_block_size);
+ pg_vec = alloc_pg_vec(req, order);
+ if (unlikely(!pg_vec))
goto out;
- memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
-
- for (i=0; i<req->tp_block_nr; i++) {
- struct page *page, *pend;
- pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
- if (!pg_vec[i])
- goto out_free_pgvec;
-
- pend = pg_vec_endpage(pg_vec[i], order);
- for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
- SetPageReserved(page);
- }
- /* Page vector is allocated */
l = 0;
- for (i=0; i<req->tp_block_nr; i++) {
+ for (i = 0; i < req->tp_block_nr; i++) {
char *ptr = pg_vec[i];
struct tpacket_hdr *header;
int k;
- for (k=0; k<po->frames_per_block; k++) {
-
- header = (struct tpacket_hdr*)ptr;
+ for (k = 0; k < po->frames_per_block; k++) {
+ header = (struct tpacket_hdr *) ptr;
header->tp_status = TP_STATUS_KERNEL;
ptr += req->tp_frame_size;
}
}
/* Done */
} else {
- if (req->tp_frame_nr)
+ if (unlikely(req->tp_frame_nr))
return -EINVAL;
}
@@ -1701,7 +1707,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
spin_lock_bh(&sk->sk_receive_queue.lock);
pg_vec = XC(po->pg_vec, pg_vec);
- po->frame_max = req->tp_frame_nr-1;
+ po->frame_max = (req->tp_frame_nr - 1);
po->head = 0;
po->frame_size = req->tp_frame_size;
spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1728,7 +1734,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
release_sock(sk);
-out_free_pgvec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
@@ -1755,17 +1760,19 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
goto out;
- atomic_inc(&po->mapped);
start = vma->vm_start;
- err = -EAGAIN;
- for (i=0; i<po->pg_vec_len; i++) {
- if (remap_pfn_range(vma, start,
- __pa(po->pg_vec[i]) >> PAGE_SHIFT,
- po->pg_vec_pages*PAGE_SIZE,
- vma->vm_page_prot))
- goto out;
- start += po->pg_vec_pages*PAGE_SIZE;
+ for (i = 0; i < po->pg_vec_len; i++) {
+ struct page *page = virt_to_page(po->pg_vec[i]);
+ int pg_num;
+
+ for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
+ err = vm_insert_page(vma, start, page);
+ if (unlikely(err))
+ goto out;
+ start += PAGE_SIZE;
+ }
}
+ atomic_inc(&po->mapped);
vma->vm_ops = &packet_mmap_ops;
err = 0;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8aebe8f6d27..2ce1cb2aa2e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -34,7 +34,7 @@
#include <net/sch_generic.h>
#include <net/act_api.h>
-#if 1 /* control */
+#if 0 /* control */
#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
#else
#define DPRINTK(format, args...)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d890dfa8818..9df888e932c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,10 +156,6 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sizeof(struct sk_buff) +
sizeof(struct sctp_chunk);
- sk->sk_wmem_queued += SCTP_DATA_SNDSIZE(chunk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
-
atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
}
@@ -3425,7 +3421,7 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
}
static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
- void * __user *to, size_t space_left)
+ void __user **to, size_t space_left)
{
struct list_head *pos;
struct sctp_sockaddr_entry *addr;
@@ -4426,7 +4422,7 @@ cleanup:
* tcp_poll(). Note that, based on these implementations, we don't
* lock the socket in this function, even though it seems that,
* ideally, locking or some other mechanisms can be used to ensure
- * the integrity of the counters (sndbuf and wmem_queued) used
+ * the integrity of the counters (sndbuf and wmem_alloc) used
* in this place. We assume that we don't need locks either until proven
* otherwise.
*
@@ -4833,10 +4829,6 @@ static void sctp_wfree(struct sk_buff *skb)
sizeof(struct sk_buff) +
sizeof(struct sctp_chunk);
- sk->sk_wmem_queued -= SCTP_DATA_SNDSIZE(chunk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
-
atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
sock_wfree(skb);
@@ -4920,7 +4912,7 @@ void sctp_write_space(struct sock *sk)
/* Is there any sndbuf space available on the socket?
*
- * Note that wmem_queued is the sum of the send buffers on all of the
+ * Note that sk_wmem_alloc is the sum of the send buffers on all of the
* associations on the same socket. For a UDP-style socket with
* multiple associations, it is possible for it to be "unwriteable"
* prematurely. I assume that this is acceptable because
@@ -4933,7 +4925,7 @@ static int sctp_writeable(struct sock *sk)
{
int amt = 0;
- amt = sk->sk_sndbuf - sk->sk_wmem_queued;
+ amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
if (amt < 0)
amt = 0;
return amt;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f44f46f1d8e..8d782282ec1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -638,7 +638,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
gss_msg);
atomic_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
- if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+ if (msg->errno == -ETIMEDOUT) {
unsigned long now = jiffies;
if (time_after(now, ratelimit)) {
printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
@@ -786,7 +786,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
cred->gc_flags = 0;
cred->gc_base.cr_ops = &gss_credops;
cred->gc_service = gss_auth->service;
- err = gss_create_upcall(gss_auth, cred);
+ do {
+ err = gss_create_upcall(gss_auth, cred);
+ } while (err == -EAGAIN);
if (err < 0)
goto out_err;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c76ea221798..16a2458f38f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -174,7 +174,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
goto out;
msg = (struct rpc_pipe_msg *)filp->private_data;
if (msg != NULL) {
- msg->errno = -EPIPE;
+ msg->errno = -EAGAIN;
list_del_init(&msg->list);
rpci->ops->destroy_msg(msg);
}
@@ -183,7 +183,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
if (filp->f_mode & FMODE_READ)
rpci->nreaders --;
if (!rpci->nreaders)
- __rpc_purge_upcall(inode, -EPIPE);
+ __rpc_purge_upcall(inode, -EAGAIN);
if (rpci->ops->release_pipe)
rpci->ops->release_pipe(inode);
out:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0a51fd46a84..77e8800d412 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -990,6 +990,7 @@ static void xs_udp_connect_worker(void *args)
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_no_check = UDP_CSUM_NORCV;
+ sk->sk_allocation = GFP_ATOMIC;
xprt_set_connected(xprt);
@@ -1074,6 +1075,7 @@ static void xs_tcp_connect_worker(void *args)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
+ sk->sk_allocation = GFP_ATOMIC;
/* socket options */
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0db9e57013f..d19e274b9c4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -346,6 +346,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *pol, **p;
struct xfrm_policy *delpol = NULL;
struct xfrm_policy **newpos = NULL;
+ struct dst_entry *gc_list;
write_lock_bh(&xfrm_policy_lock);
for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
@@ -381,9 +382,36 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
write_unlock_bh(&xfrm_policy_lock);
- if (delpol) {
+ if (delpol)
xfrm_policy_kill(delpol);
+
+ read_lock_bh(&xfrm_policy_lock);
+ gc_list = NULL;
+ for (policy = policy->next; policy; policy = policy->next) {
+ struct dst_entry *dst;
+
+ write_lock(&policy->lock);
+ dst = policy->bundles;
+ if (dst) {
+ struct dst_entry *tail = dst;
+ while (tail->next)
+ tail = tail->next;
+ tail->next = gc_list;
+ gc_list = dst;
+
+ policy->bundles = NULL;
+ }
+ write_unlock(&policy->lock);
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+
+ while (gc_list) {
+ struct dst_entry *dst = gc_list;
+
+ gc_list = dst->next;
+ dst_free(dst);
}
+
return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);
@@ -1014,13 +1042,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
}
EXPORT_SYMBOL(__xfrm_route_forward);
-/* Optimize later using cookies and generation ids. */
-
static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
{
- if (!stale_bundle(dst))
- return dst;
-
+ /* If it is marked obsolete, which is how we even get here,
+ * then we have purged it from the policy bundle list and we
+ * did that for a good reason.
+ */
return NULL;
}
@@ -1104,6 +1131,16 @@ int xfrm_flush_bundles(void)
return 0;
}
+static int always_true(struct dst_entry *dst)
+{
+ return 1;
+}
+
+void xfrm_flush_all_bundles(void)
+{
+ xfrm_prune_bundles(always_true);
+}
+
void xfrm_init_pmtu(struct dst_entry *dst)
{
do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7cf48aa6c95..479effc9766 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -431,6 +431,8 @@ void xfrm_state_insert(struct xfrm_state *x)
spin_lock_bh(&xfrm_state_lock);
__xfrm_state_insert(x);
spin_unlock_bh(&xfrm_state_lock);
+
+ xfrm_flush_all_bundles();
}
EXPORT_SYMBOL(xfrm_state_insert);
@@ -478,6 +480,9 @@ out:
spin_unlock_bh(&xfrm_state_lock);
xfrm_state_put_afinfo(afinfo);
+ if (!err)
+ xfrm_flush_all_bundles();
+
if (x1) {
xfrm_state_delete(x1);
xfrm_state_put(x1);