diff options
Diffstat (limited to 'net/ipv4')
51 files changed, 1325 insertions, 1428 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d12c6a9b19..33b7dffa773 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,7 +1385,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -unsigned long snmp_fold_field(void *mib[], int offt) +unsigned long snmp_fold_field(void __percpu *mib[], int offt) { unsigned long res = 0; int i; @@ -1398,7 +1398,7 @@ unsigned long snmp_fold_field(void *mib[], int offt) } EXPORT_SYMBOL_GPL(snmp_fold_field); -int snmp_mib_init(void *ptr[2], size_t mibsize) +int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); @@ -1416,7 +1416,7 @@ err0: } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void *ptr[2]) +void snmp_mib_free(void __percpu *ptr[2]) { BUG_ON(ptr == NULL); free_percpu(ptr[0]); @@ -1460,25 +1460,25 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { - if (snmp_mib_init((void **)net->mib.tcp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib)) < 0) goto err_tcp_mib; - if (snmp_mib_init((void **)net->mib.ip_statistics, + if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)net->mib.net_statistics, + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib)) < 0) goto err_net_mib; - if (snmp_mib_init((void **)net->mib.udp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)net->mib.udplite_statistics, + if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; - if (snmp_mib_init((void **)net->mib.icmp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, sizeof(struct icmp_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, sizeof(struct icmpmsg_mib)) < 0) goto err_icmpmsg_mib; @@ -1486,30 +1486,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void **)net->mib.icmp_statistics); + snmp_mib_free((void __percpu **)net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void **)net->mib.udplite_statistics); + snmp_mib_free((void __percpu **)net->mib.udplite_statistics); err_udplite_mib: - snmp_mib_free((void **)net->mib.udp_statistics); + snmp_mib_free((void __percpu **)net->mib.udp_statistics); err_udp_mib: - snmp_mib_free((void **)net->mib.net_statistics); + snmp_mib_free((void __percpu **)net->mib.net_statistics); err_net_mib: - snmp_mib_free((void **)net->mib.ip_statistics); + snmp_mib_free((void __percpu **)net->mib.ip_statistics); err_ip_mib: - snmp_mib_free((void **)net->mib.tcp_statistics); + snmp_mib_free((void __percpu **)net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } static __net_exit void ipv4_mib_exit_net(struct net *net) { - snmp_mib_free((void **)net->mib.icmpmsg_statistics); - snmp_mib_free((void **)net->mib.icmp_statistics); - snmp_mib_free((void **)net->mib.udplite_statistics); - snmp_mib_free((void **)net->mib.udp_statistics); - snmp_mib_free((void **)net->mib.net_statistics); - snmp_mib_free((void **)net->mib.ip_statistics); - snmp_mib_free((void **)net->mib.tcp_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); + snmp_mib_free((void __percpu **)net->mib.icmp_statistics); + snmp_mib_free((void __percpu **)net->mib.udplite_statistics); + snmp_mib_free((void __percpu **)net->mib.udp_statistics); + snmp_mib_free((void __percpu **)net->mib.net_statistics); + snmp_mib_free((void __percpu **)net->mib.ip_statistics); + snmp_mib_free((void __percpu **)net->mib.tcp_statistics); } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7ed3e4ae93a..987b47dc69a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c95cd93acf2..c4dd1354280 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -70,6 +70,7 @@ * bonding can change the skb before * sending (e.g. insert 8021q tag). * Harald Welte : convert to make use of jenkins hash + * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. */ #include <linux/module.h> @@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst) /* * Check if we can use proxy ARP for this path */ - -static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) +static inline int arp_fwd_proxy(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt) { struct in_device *out_dev; int imi, omi = -1; + if (rt->u.dst.dev == dev) + return 0; + if (!IN_DEV_PROXY_ARP(in_dev)) return 0; @@ -548,6 +552,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) } /* + * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev) + * + * RFC3069 supports proxy arp replies back to the same interface. This + * is done to support (ethernet) switch features, like RFC 3069, where + * the individual ports are not allowed to communicate with each + * other, BUT they are allowed to talk to the upstream router. As + * described in RFC 3069, it is possible to allow these hosts to + * communicate through the upstream router, by proxy_arp'ing. + * + * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation" + * + * This technology is known by different names: + * In RFC 3069 it is called VLAN Aggregation. + * Cisco and Allied Telesyn call it Private VLAN. + * Hewlett-Packard call it Source-Port filtering or port-isolation. + * Ericsson call it MAC-Forced Forwarding (RFC Draft). + * + */ +static inline int arp_fwd_pvlan(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt, + __be32 sip, __be32 tip) +{ + /* Private VLAN is only concerned about the same ethernet segment */ + if (rt->u.dst.dev != dev) + return 0; + + /* Don't reply on self probes (often done by windowz boxes)*/ + if (sip == tip) + return 0; + + if (IN_DEV_PROXY_ARP_PVLAN(in_dev)) + return 1; + else + return 0; +} + +/* * Interface to link layer: send routine and receive handler. */ @@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb) } goto out; } else if (IN_DEV_FORWARD(in_dev)) { - if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + if (addr_type == RTN_UNICAST && + (arp_fwd_proxy(in_dev, dev, rt) || + arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || + pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) + { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); @@ -863,7 +907,8 @@ static int arp_process(struct sk_buff *skb) devices (strip is candidate) */ if (n == NULL && - arp->ar_op == htons(ARPOP_REPLY) && + (arp->ar_op == htons(ARPOP_REPLY) || + (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && inet_addr_type(net, sip) == RTN_UNICAST) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -1239,8 +1284,7 @@ void __init arp_init(void) dev_add_pack(&arp_packet_type); arp_proc_init(); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc102a41..51ca946e339 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,20 +64,20 @@ static struct ipv4_devconf ipv4_devconf = { .data = { - [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, + [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, }, }; static struct ipv4_devconf ipv4_devconf_dflt = { .data = { - [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, - [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, - [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, + [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, + [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, + [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, }, }; @@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *valp = val; + *ppos = pos; return restart_syscall(); + } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); } else if (*valp) { @@ -1360,7 +1365,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, { \ .procname = name, \ .data = ipv4_devconf.data + \ - NET_IPV4_CONF_ ## attr - 1, \ + IPV4_DEVCONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ @@ -1381,7 +1386,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; + struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; char *dev_name; } devinet_sysctl = { .devinet_vars = { @@ -1397,6 +1402,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), + DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), @@ -1407,6 +1413,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), + DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), @@ -1485,8 +1492,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { - neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, &idev->cnf); } @@ -1501,7 +1507,7 @@ static struct ctl_table ctl_forward_entry[] = { { .procname = "ip_forward", .data = &ipv4_devconf.data[ - NET_IPV4_CONF_FORWARDING - 1], + IPV4_DEVCONF_FORWARDING - 1], .maxlen = sizeof(int), .mode = 0644, .proc_handler = devinet_sysctl_forward, @@ -1545,7 +1551,7 @@ static __net_init int devinet_init_net(struct net *net) if (tbl == NULL) goto err_alloc_ctl; - tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1]; + tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; tbl[0].extra1 = all; tbl[0].extra2 = net; #endif diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1948895beb6..14ca1f1c3fb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3323168ee52..9b3e28ed524 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); + if (mark && !IN_DEV_SRC_VMARK(in_dev)) + fl.mark = 0; } rcu_read_unlock(); @@ -881,7 +883,7 @@ static void nl_fib_input(struct sk_buff *skb) netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); } -static int nl_fib_lookup_init(struct net *net) +static int __net_init nl_fib_lookup_init(struct net *net) { struct sock *sk; sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, @@ -1002,7 +1004,7 @@ fail: return err; } -static void __net_exit ip_fib_net_exit(struct net *net) +static void ip_fib_net_exit(struct net *net) { unsigned int i; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ed19aa6919c..1af0ea0fb6a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -62,8 +62,8 @@ static DEFINE_SPINLOCK(fib_multipath_lock); #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) -#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ -for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) +#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ +for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -72,7 +72,7 @@ for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ for (nhsel=0; nhsel < 1; nhsel++) -#define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ +#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ for (nhsel=0; nhsel < 1; nhsel++) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -145,9 +145,9 @@ void free_fib_info(struct fib_info *fi) return; } change_nexthops(fi) { - if (nh->nh_dev) - dev_put(nh->nh_dev); - nh->nh_dev = NULL; + if (nexthop_nh->nh_dev) + dev_put(nexthop_nh->nh_dev); + nexthop_nh->nh_dev = NULL; } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); @@ -162,9 +162,9 @@ void fib_release_info(struct fib_info *fi) if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); change_nexthops(fi) { - if (!nh->nh_dev) + if (!nexthop_nh->nh_dev) continue; - hlist_del(&nh->nh_hash); + hlist_del(&nexthop_nh->nh_hash); } endfor_nexthops(fi) fi->fib_dead = 1; fib_info_put(fi); @@ -395,19 +395,20 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; - nh->nh_oif = rtnh->rtnh_ifindex; - nh->nh_weight = rtnh->rtnh_hops + 1; + nexthop_nh->nh_flags = + (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nexthop_nh->nh_oif = rtnh->rtnh_ifindex; + nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nh->nh_gw = nla ? nla_get_be32(nla) : 0; + nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; #ifdef CONFIG_NET_CLS_ROUTE nla = nla_find(attrs, attrlen, RTA_FLOW); - nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; + nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif } @@ -527,10 +528,6 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (nh->nh_gw) { struct fib_result res; -#ifdef CONFIG_IP_ROUTE_PERVASIVE - if (nh->nh_flags&RTNH_F_PERVASIVE) - return 0; -#endif if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; @@ -738,7 +735,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_nhs = nhs; change_nexthops(fi) { - nh->nh_parent = fi; + nexthop_nh->nh_parent = fi; } endfor_nexthops(fi) if (cfg->fc_mx) { @@ -808,7 +805,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(cfg, fi, nh)) != 0) + if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) goto failure; } endfor_nexthops(fi) } @@ -843,11 +840,11 @@ link_it: struct hlist_head *head; unsigned int hash; - if (!nh->nh_dev) + if (!nexthop_nh->nh_dev) continue; - hash = fib_devindex_hashfn(nh->nh_dev->ifindex); + hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); head = &fib_info_devhash[hash]; - hlist_add_head(&nh->nh_hash, head); + hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) spin_unlock_bh(&fib_info_lock); return fi; @@ -1080,21 +1077,21 @@ int fib_sync_down_dev(struct net_device *dev, int force) prev_fi = fi; dead = 0; change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) + if (nexthop_nh->nh_flags&RTNH_F_DEAD) dead++; - else if (nh->nh_dev == dev && - nh->nh_scope != scope) { - nh->nh_flags |= RTNH_F_DEAD; + else if (nexthop_nh->nh_dev == dev && + nexthop_nh->nh_scope != scope) { + nexthop_nh->nh_flags |= RTNH_F_DEAD; #ifdef CONFIG_IP_ROUTE_MULTIPATH spin_lock_bh(&fib_multipath_lock); - fi->fib_power -= nh->nh_power; - nh->nh_power = 0; + fi->fib_power -= nexthop_nh->nh_power; + nexthop_nh->nh_power = 0; spin_unlock_bh(&fib_multipath_lock); #endif dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (force > 1 && nh->nh_dev == dev) { + if (force > 1 && nexthop_nh->nh_dev == dev) { dead = fi->fib_nhs; break; } @@ -1144,18 +1141,20 @@ int fib_sync_up(struct net_device *dev) prev_fi = fi; alive = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { + if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { alive++; continue; } - if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) + if (nexthop_nh->nh_dev == NULL || + !(nexthop_nh->nh_dev->flags&IFF_UP)) continue; - if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) + if (nexthop_nh->nh_dev != dev || + !__in_dev_get_rtnl(dev)) continue; alive++; spin_lock_bh(&fib_multipath_lock); - nh->nh_power = 0; - nh->nh_flags &= ~RTNH_F_DEAD; + nexthop_nh->nh_power = 0; + nexthop_nh->nh_flags &= ~RTNH_F_DEAD; spin_unlock_bh(&fib_multipath_lock); } endfor_nexthops(fi) @@ -1182,9 +1181,9 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - power += nh->nh_weight; - nh->nh_power = nh->nh_weight; + if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { + power += nexthop_nh->nh_weight; + nexthop_nh->nh_power = nexthop_nh->nh_weight; } } endfor_nexthops(fi); fi->fib_power = power; @@ -1204,9 +1203,10 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) w = jiffies % fi->fib_power; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { - if ((w -= nh->nh_power) <= 0) { - nh->nh_power--; + if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && + nexthop_nh->nh_power) { + if ((w -= nexthop_nh->nh_power) <= 0) { + nexthop_nh->nh_power--; fi->fib_power--; res->nh_sel = nhsel; spin_unlock_bh(&fib_multipath_lock); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fe11f60ce41..4b4c2bcd15d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -114,7 +114,7 @@ struct icmp_bxm { /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ -struct icmp_err icmp_err_convert[] = { +const struct icmp_err icmp_err_convert[] = { { .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ .fatal = 0, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c08402c93..63bf298ca10 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -946,7 +946,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ if (skb_rtable(skb)->fl.iif == 0) break; @@ -960,6 +959,7 @@ int igmp_rcv(struct sk_buff *skb) in_dev_put(in_dev); return pim_rcv_v1(skb); #endif + case IGMPV3_HOST_MEMBERSHIP_REPORT: case IGMP_DVMRP: case IGMP_TRACE: case IGMP_HOST_LEAVE_MESSAGE: @@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - inet->mc_list = iml; + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -1807,24 +1807,46 @@ done: return err; } +static void ip_sf_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_sf_socklist *psf; + + psf = container_of(rp, struct ip_sf_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(psf); +} + static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { + struct ip_sf_socklist *psf = iml->sflist; int err; - if (iml->sflist == NULL) { + if (psf == NULL) { /* any-source empty exclude case */ return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, 0, NULL, 0); } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, - iml->sfmode, iml->sflist->sl_count, - iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + iml->sfmode, psf->sl_count, psf->sl_addr, 0); + rcu_assign_pointer(iml->sflist, NULL); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + call_rcu(&psf->rcu, ip_sf_socklist_reclaim); return err; } + +static void ip_mc_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_mc_socklist *iml; + + iml = container_of(rp, struct ip_mc_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(iml); +} + + /* * Ask a socket to leave a group. */ @@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + rcu_assign_pointer(*imlp, iml->next); if (in_dev) ip_mc_dec_group(in_dev, group); rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); return 0; } if (!in_dev) @@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (psl) { for (i=0; i<psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + call_rcu(&psl->rcu, ip_sf_socklist_reclaim); } - pmc->sflist = psl = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); + psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { @@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + call_rcu(&psl->rcu, ip_sf_socklist_reclaim); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: @@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; + int ret; + ret = 1; if (!ipv4_is_multicast(loc_addr)) - return 1; + goto out; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; } + ret = inet->mc_all; if (!pmc) - return inet->mc_all; + goto unlock; psl = pmc->sflist; + ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) - return pmc->sfmode == MCAST_EXCLUDE; + goto unlock; for (i=0; i<psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } + ret = 0; if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - return 0; + goto unlock; if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - return 0; - return 1; + goto unlock; + ret = 1; +unlock: + rcu_read_unlock(); +out: + return ret; } /* @@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; - inet->mc_list = iml->next; + rcu_assign_pointer(inet->mc_list, iml->next); in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); @@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); } rtnl_unlock(); } @@ -2603,7 +2644,7 @@ static const struct file_operations igmp_mcf_seq_fops = { .release = seq_release_net, }; -static int igmp_net_init(struct net *net) +static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; @@ -2621,7 +2662,7 @@ out_igmp: return -ENOMEM; } -static void igmp_net_exit(struct net *net) +static void __net_exit igmp_net_exit(struct net *net) { proc_net_remove(net, "mcfilter"); proc_net_remove(net, "igmp"); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ee16475f8fc..8da6429269d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -529,6 +529,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept, &expire, &resend); + if (req->rsk_ops->syn_ack_timeout) + req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bdb78dd180c..1aaa8110d84 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -368,7 +368,7 @@ static int inet_diag_bc_run(const void *bc, int len, yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: - yes = entry->dport <= op[1].no; + yes = entry->sport <= op[1].no; break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 86964b353c3..b59430bc041 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -32,6 +32,8 @@ #include <linux/netdevice.h> #include <linux/jhash.h> #include <linux/random.h> +#include <net/route.h> +#include <net/dst.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> @@ -205,11 +207,34 @@ static void ip_expire(unsigned long arg) if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; - /* Send an ICMP "Fragment Reassembly Timeout" message. */ rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); - if (head->dev) - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + if (!head->dev) + goto out_rcu_unlock; + + /* + * Only search router table for the head fragment, + * when defraging timeout at PRE_ROUTING HOOK. + */ + if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { + const struct iphdr *iph = ip_hdr(head); + int err = ip_route_input(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (unlikely(err)) + goto out_rcu_unlock; + + /* + * Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. + */ + if (skb_rtable(head)->rt_type != RTN_LOCAL) + goto out_rcu_unlock; + + } + + /* Send an ICMP "Fragment Reassembly Timeout" message. */ + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); +out_rcu_unlock: rcu_read_unlock(); } out: @@ -646,7 +671,7 @@ static struct ctl_table ip4_frags_ctl_table[] = { { } }; -static int ip4_frags_ns_ctl_register(struct net *net) +static int __net_init ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -676,7 +701,7 @@ err_alloc: return -ENOMEM; } -static void ip4_frags_ns_ctl_unregister(struct net *net) +static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -704,7 +729,7 @@ static inline void ip4_frags_ctl_register(void) } #endif -static int ipv4_frags_init_net(struct net *net) +static int __net_init ipv4_frags_init_net(struct net *net) { /* * Fragment cache limits. We will commit 256K at one time. Should we @@ -726,7 +751,7 @@ static int ipv4_frags_init_net(struct net *net) return ip4_frags_ns_ctl_register(net); } -static void ipv4_frags_exit_net(struct net *net) +static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f36ce156cac..c0c5274d027 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } @@ -1307,7 +1307,7 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) } } -static int ipgre_init_net(struct net *net) +static int __net_init ipgre_init_net(struct net *net) { struct ipgre_net *ign = net_generic(net, ipgre_net_id); int err; @@ -1334,7 +1334,7 @@ err_alloc_dev: return err; } -static void ipgre_exit_net(struct net *net) +static void __net_exit ipgre_exit_net(struct net *net) { struct ipgre_net *ign; LIST_HEAD(list); @@ -1665,14 +1665,15 @@ static int __init ipgre_init(void) printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); - if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { - printk(KERN_INFO "ipgre init: can't add protocol\n"); - return -EAGAIN; - } - err = register_pernet_device(&ipgre_net_ops); if (err < 0) - goto gen_device_failed; + return err; + + err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); + if (err < 0) { + printk(KERN_INFO "ipgre init: can't add protocol\n"); + goto add_proto_failed; + } err = rtnl_link_register(&ipgre_link_ops); if (err < 0) @@ -1688,9 +1689,9 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - unregister_pernet_device(&ipgre_net_ops); -gen_device_failed: inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_device(&ipgre_net_ops); goto out; } @@ -1698,9 +1699,9 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - unregister_pernet_device(&ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); + unregister_pernet_device(&ipgre_net_ops); } module_init(ipgre_init); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e34013a78ef..3451799e3db 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -254,7 +254,7 @@ int ip_mc_output(struct sk_buff *skb) */ if (rt->rt_flags&RTCF_MULTICAST) { - if ((!sk || inet_sk(sk)->mc_loop) + if (sk_mc_loop(sk) #ifdef CONFIG_IP_MROUTE /* Small optimization: do not loopback not local frames, which returned after forwarding; they will be dropped diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cafad9baff0..644dc43a55d 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_TTL) | (1<<IP_HDRINCL) | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | - (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || + (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | + (1<<IP_MINTTL))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_ALL || optname == IP_MULTICAST_LOOP || @@ -936,6 +937,14 @@ mc_msf_out: inet->transparent = !!val; break; + case IP_MINTTL: + if (optlen < 1) + goto e_inval; + if (val < 0 || val > 255) + goto e_inval; + inet->min_ttl = val; + break; + default: err = -ENOPROTOOPT; break; @@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TRANSPARENT: val = inet->transparent; break; + case IP_MINTTL: + val = inet->min_ttl; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38fbf04150a..629067571f0 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -25,6 +25,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) { + struct net *net = dev_net(skb->dev); __be32 spi; struct iphdr *iph = (struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); @@ -35,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -47,9 +48,10 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) /* We always hold one tunnel user reference to indicate a tunnel */ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { + struct net *net = xs_net(x); struct xfrm_state *t; - t = xfrm_state_alloc(&init_net); + t = xfrm_state_alloc(net); if (t == NULL) goto out; @@ -61,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) goto error; @@ -82,10 +85,12 @@ error: */ static int ipcomp_tunnel_attach(struct xfrm_state *x) { + struct net *net = xs_net(x); int err = 0; struct xfrm_state *t; + u32 mark = x->mark.v & x->mark.m; - t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); @@ -124,16 +129,12 @@ static int ipcomp4_init_state(struct xfrm_state *x) if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) - goto error_tunnel; + goto out; } err = 0; out: return err; - -error_tunnel: - ipcomp_destroy(x); - goto out; } static const struct xfrm_type ipcomp_type = { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index eda04fed337..2f302d3ac9a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,7 +130,6 @@ struct ipip_net { struct net_device *fb_tunnel_dev; }; -static void ipip_fb_tunnel_init(struct net_device *dev); static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); @@ -730,7 +729,7 @@ static void ipip_tunnel_init(struct net_device *dev) ipip_tunnel_bind_dev(dev); } -static void ipip_fb_tunnel_init(struct net_device *dev) +static void __net_init ipip_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -773,7 +772,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) } } -static int ipip_init_net(struct net *net) +static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); int err; @@ -806,7 +805,7 @@ err_alloc_dev: return err; } -static void ipip_exit_net(struct net *net) +static void __net_exit ipip_exit_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); LIST_HEAD(list); @@ -831,15 +830,14 @@ static int __init ipip_init(void) printk(banner); - if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { + err = register_pernet_device(&ipip_net_ops); + if (err < 0) + return err; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + unregister_pernet_device(&ipip_net_ops); printk(KERN_INFO "ipip init: can't register tunnel\n"); - return -EAGAIN; } - - err = register_pernet_device(&ipip_net_ops); - if (err) - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); - return err; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 54596f73eff..8582e12e4a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1163,9 +1163,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v int ct; LIST_HEAD(list); - if (!net_eq(dev_net(dev), net)) - return NOTIFY_DONE; - if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v = &net->ipv4.vif_table[0]; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 06632762ba5..f07d77f6575 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -27,6 +27,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); @@ -58,6 +59,12 @@ do { \ #define ARP_NF_ASSERT(x) #endif +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); + static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, const char *hdr_addr, int len) { @@ -226,7 +233,14 @@ arpt_error(struct sk_buff *skb, const struct xt_target_param *par) return NF_DROP; } -static inline struct arpt_entry *get_entry(void *base, unsigned int offset) +static inline const struct arpt_entry_target * +arpt_get_target_c(const struct arpt_entry *e) +{ + return arpt_get_target((struct arpt_entry *)e); +} + +static inline struct arpt_entry * +get_entry(const void *base, unsigned int offset) { return (struct arpt_entry *)(base + offset); } @@ -273,7 +287,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { - struct arpt_entry_target *t; + const struct arpt_entry_target *t; int hdr_len; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -285,7 +299,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, (2 * skb->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); - t = arpt_get_target(e); + t = arpt_get_target_c(e); /* Standard target? */ if (!t->u.kernel.target->target) { @@ -351,7 +365,7 @@ static inline bool unconditional(const struct arpt_arp *arp) /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ -static int mark_source_chains(struct xt_table_info *newinfo, +static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -372,7 +386,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, for (;;) { const struct arpt_standard_target *t - = (void *)arpt_get_target(e); + = (void *)arpt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { @@ -456,7 +470,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int check_entry(struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e, const char *name) { const struct arpt_entry_target *t; @@ -468,7 +482,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) return -EINVAL; - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (e->target_offset + t->u.target_size > e->next_offset) return -EINVAL; @@ -498,8 +512,7 @@ static inline int check_target(struct arpt_entry *e, const char *name) } static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { struct arpt_entry_target *t; struct xt_target *target; @@ -524,8 +537,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, ret = check_target(e, name); if (ret) goto err; - - (*i)++; return 0; err: module_put(t->u.kernel.target->me); @@ -533,14 +544,14 @@ out: return ret; } -static bool check_underflow(struct arpt_entry *e) +static bool check_underflow(const struct arpt_entry *e) { const struct arpt_entry_target *t; unsigned int verdict; if (!unconditional(&e->arp)) return false; - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct arpt_standard_target *)t)->verdict; @@ -550,12 +561,11 @@ static bool check_underflow(struct arpt_entry *e) static inline int check_entry_size_and_hooks(struct arpt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int valid_hooks, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; @@ -592,19 +602,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) +static inline void cleanup_entry(struct arpt_entry *e) { struct xt_tgdtor_param par; struct arpt_entry_target *t; - if (i && (*i)-- == 0) - return 1; - t = arpt_get_target(e); par.target = t->u.kernel.target; par.targinfo = t->data; @@ -612,26 +617,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - return 0; } /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +static int translate_table(struct xt_table_info *newinfo, void *entry0, + const struct arpt_replace *repl) { + struct arpt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -643,52 +642,63 @@ static int translate_table(const char *name, i = 0; /* Walk through entries, checking offsets. */ - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, valid_hooks, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + break; + ++i; + } duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) { + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { duprintf("Looping hook\n"); return -ELOOP; } /* Finally, each sanity check must pass */ i = 0; - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - ARPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter); + } return ret; } @@ -701,30 +711,10 @@ static int translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int add_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int set_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct arpt_entry *iter; unsigned int cpu; unsigned int i; unsigned int curcpu; @@ -740,32 +730,32 @@ static void get_counters(const struct xt_table_info *t, curcpu = smp_processor_id(); i = 0; - ARPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[curcpu], t->size) { + SET_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; i = 0; xt_info_wrlock(cpu); - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + ADD_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } xt_info_wrunlock(cpu); } local_bh_enable(); } -static struct xt_counters *alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -783,11 +773,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) } static int copy_entries_to_user(unsigned int total_size, - struct xt_table *table, + const struct xt_table *table, void __user *userptr) { unsigned int off, num; - struct arpt_entry *e; + const struct arpt_entry *e; struct xt_counters *counters; struct xt_table_info *private = table->private; int ret = 0; @@ -807,7 +797,7 @@ static int copy_entries_to_user(unsigned int total_size, /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ - struct arpt_entry_target *t; + const struct arpt_entry_target *t; e = (struct arpt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -818,7 +808,7 @@ static int copy_entries_to_user(unsigned int total_size, goto free_counters; } - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset + offsetof(struct arpt_entry_target, u.user.name), @@ -835,7 +825,7 @@ static int copy_entries_to_user(unsigned int total_size, } #ifdef CONFIG_COMPAT -static void compat_standard_from_user(void *dst, void *src) +static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; @@ -844,7 +834,7 @@ static void compat_standard_from_user(void *dst, void *src) memcpy(dst, &v, sizeof(v)); } -static int compat_standard_to_user(void __user *dst, void *src) +static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; @@ -853,18 +843,18 @@ static int compat_standard_to_user(void __user *dst, void *src) return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } -static int compat_calc_entry(struct arpt_entry *e, +static int compat_calc_entry(const struct arpt_entry *e, const struct xt_table_info *info, - void *base, struct xt_table_info *newinfo) + const void *base, struct xt_table_info *newinfo) { - struct arpt_entry_target *t; + const struct arpt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); entry_offset = (void *)e - base; - t = arpt_get_target(e); + t = arpt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); @@ -885,7 +875,9 @@ static int compat_calc_entry(struct arpt_entry *e, static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { + struct arpt_entry *iter; void *loc_cpu_entry; + int ret; if (!newinfo || !info) return -EINVAL; @@ -894,13 +886,17 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; - return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size, - compat_calc_entry, info, loc_cpu_entry, - newinfo); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; } #endif -static int get_info(struct net *net, void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, + const int *len, int compat) { char name[ARPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -925,10 +921,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; @@ -959,7 +955,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) } static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, - int *len) + const int *len) { int ret; struct arpt_get_entries get; @@ -1010,6 +1006,7 @@ static int __do_replace(struct net *net, const char *name, struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; + struct arpt_entry *iter; ret = 0; counters = vmalloc_node(num_counters * sizeof(struct xt_counters), @@ -1053,8 +1050,8 @@ static int __do_replace(struct net *net, const char *name, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, @@ -1073,12 +1070,14 @@ static int __do_replace(struct net *net, const char *name, return ret; } -static int do_replace(struct net *net, void __user *user, unsigned int len) +static int do_replace(struct net *net, const void __user *user, + unsigned int len) { int ret; struct arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct arpt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1099,9 +1098,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); + ret = translate_table(newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1114,27 +1111,15 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); free_newinfo: xt_free_table_info(newinfo); return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -static int do_add_counters(struct net *net, void __user *user, unsigned int len, - int compat) +static int do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { unsigned int i, curcpu; struct xt_counters_info tmp; @@ -1147,6 +1132,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct arpt_entry *iter; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1204,11 +1190,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); - ARPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc, - &i); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } xt_info_wrunlock(curcpu); unlock_up_free: local_bh_enable(); @@ -1221,28 +1206,22 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, } #ifdef CONFIG_COMPAT -static inline int -compat_release_entry(struct compat_arpt_entry *e, unsigned int *i) +static inline void compat_release_entry(struct compat_arpt_entry *e) { struct arpt_entry_target *t; - if (i && (*i)-- == 0) - return 1; - t = compat_arpt_get_target(e); module_put(t->u.kernel.target->me); - return 0; } static inline int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, - unsigned char *base, - unsigned char *limit, - unsigned int *hook_entries, - unsigned int *underflows, - unsigned int *i, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, const char *name) { struct arpt_entry_target *t; @@ -1302,8 +1281,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, /* Clear counters and comefrom */ memset(&e->counters, 0, sizeof(e->counters)); e->comefrom = 0; - - (*i)++; return 0; release_target: @@ -1347,19 +1324,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct arpt_entry *e, const char *name, - unsigned int *i) -{ - int ret; - - ret = check_target(e, name); - if (ret) - return ret; - - (*i)++; - return 0; -} - static int translate_compat_table(const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, @@ -1372,8 +1336,10 @@ static int translate_compat_table(const char *name, unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; + struct compat_arpt_entry *iter0; + struct arpt_entry *iter1; unsigned int size; - int ret; + int ret = 0; info = *pinfo; entry0 = *pentry0; @@ -1390,13 +1356,17 @@ static int translate_compat_table(const char *name, j = 0; xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ - ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, - check_compat_entry_size_and_hooks, - info, &size, entry0, - entry0 + total_size, - hook_entries, underflows, &j, name); - if (ret != 0) - goto out_unlock; + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } ret = -EINVAL; if (j != number) { @@ -1435,9 +1405,12 @@ static int translate_compat_table(const char *name, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = total_size; - ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, - compat_copy_entry_from_user, - &pos, &size, name, newinfo, entry1); + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); if (ret) @@ -1448,13 +1421,32 @@ static int translate_compat_table(const char *name, goto free_newinfo; i = 0; - ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = check_target(iter1, name); + if (ret != 0) + break; + ++i; + } if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; j -= i; - COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, - compat_release_entry, &j); - ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1); + } xt_free_table_info(newinfo); return ret; } @@ -1472,7 +1464,11 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } return ret; out_unlock: xt_compat_flush_offsets(NFPROTO_ARP); @@ -1499,6 +1495,7 @@ static int compat_do_replace(struct net *net, void __user *user, struct compat_arpt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct arpt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1536,7 +1533,8 @@ static int compat_do_replace(struct net *net, void __user *user, return 0; free_newinfo_untrans: - ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1570,7 +1568,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t *size, struct xt_counters *counters, - unsigned int *i) + unsigned int i) { struct arpt_entry_target *t; struct compat_arpt_entry __user *ce; @@ -1578,14 +1576,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t origsize; int ret; - ret = -EFAULT; origsize = *size; ce = (struct compat_arpt_entry __user *)*dstptr; - if (copy_to_user(ce, e, sizeof(struct arpt_entry))) - goto out; - - if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) - goto out; + if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; *dstptr += sizeof(struct compat_arpt_entry); *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); @@ -1595,18 +1591,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, t = arpt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) - goto out; - ret = -EFAULT; + return ret; next_offset = e->next_offset - (origsize - *size); - if (put_user(target_offset, &ce->target_offset)) - goto out; - if (put_user(next_offset, &ce->next_offset)) - goto out; - - (*i)++; + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; return 0; -out: - return ret; } static int compat_copy_entries_to_user(unsigned int total_size, @@ -1620,6 +1610,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, int ret = 0; void *loc_cpu_entry; unsigned int i = 0; + struct arpt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1629,9 +1620,12 @@ static int compat_copy_entries_to_user(unsigned int total_size, loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size, - compat_copy_entry_to_user, - &pos, &size, counters, &i); + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } vfree(counters); return ret; } @@ -1799,12 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); - + ret = translate_table(newinfo, loc_cpu_entry, repl); duprintf("arpt_register_table: translate table gives %d\n", ret); if (ret != 0) goto out_free; @@ -1827,13 +1816,14 @@ void arpt_unregister_table(struct xt_table *table) struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; + struct arpt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, - cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 97337601827..bfe26f32b93 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -6,6 +6,7 @@ */ #include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> MODULE_LICENSE("GPL"); @@ -15,93 +16,37 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static const struct -{ - struct arpt_replace repl; - struct arpt_standard entries[3]; - struct arpt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), - .hook_entry = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - .underflow = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - }, - .entries = { - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ - }, - .term = ARPT_ERROR_INIT, -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_ARP, + .priority = NF_IP_PRI_FILTER, }; /* The work comes in here from netfilter.c */ -static unsigned int arpt_in_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int +arptable_filter_hook(unsigned int hook, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.arptable_filter); -} + const struct net *net = dev_net((in != NULL) ? in : out); -static unsigned int arpt_out_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return arpt_do_table(skb, hook, in, out, - dev_net(out)->ipv4.arptable_filter); + return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); } -static struct nf_hook_ops arpt_ops[] __read_mostly = { - { - .hook = arpt_in_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_IN, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = arpt_out_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_OUT, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = arpt_in_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_FORWARD, - .priority = NF_IP_PRI_FILTER, - }, -}; +static struct nf_hook_ops *arpfilter_ops __read_mostly; static int __net_init arptable_filter_net_init(struct net *net) { - /* Register table */ + struct arpt_replace *repl; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; net->ipv4.arptable_filter = - arpt_register_table(net, &packet_filter, &initial_table.repl); + arpt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.arptable_filter)) return PTR_ERR(net->ipv4.arptable_filter); return 0; @@ -125,9 +70,11 @@ static int __init arptable_filter_init(void) if (ret < 0) return ret; - ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); - if (ret < 0) + arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); + if (IS_ERR(arpfilter_ops)) { + ret = PTR_ERR(arpfilter_ops); goto cleanup_table; + } return ret; cleanup_table: @@ -137,7 +84,7 @@ cleanup_table: static void __exit arptable_filter_fini(void) { - nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); + xt_hook_unlink(&packet_filter, arpfilter_ops); unregister_pernet_subsys(&arptable_filter_net_ops); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552e..b29c66df8d1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -28,6 +28,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/nf_log.h> +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -66,6 +67,12 @@ do { \ #define inline #endif +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore @@ -169,7 +176,7 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par) /* Performance critical - called for every packet */ static inline bool -do_match(struct ipt_entry_match *m, const struct sk_buff *skb, +do_match(const struct ipt_entry_match *m, const struct sk_buff *skb, struct xt_match_param *par) { par->match = m->u.kernel.match; @@ -184,7 +191,7 @@ do_match(struct ipt_entry_match *m, const struct sk_buff *skb, /* Performance critical */ static inline struct ipt_entry * -get_entry(void *base, unsigned int offset) +get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } @@ -199,6 +206,13 @@ static inline bool unconditional(const struct ipt_ip *ip) #undef FWINV } +/* for const-correctness */ +static inline const struct ipt_entry_target * +ipt_get_target_c(const struct ipt_entry *e) +{ + return ipt_get_target((struct ipt_entry *)e); +} + #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) static const char *const hooknames[] = { @@ -233,11 +247,11 @@ static struct nf_loginfo trace_loginfo = { /* Mildly perf critical (only if packet tracing is on) */ static inline int -get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, +get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { - struct ipt_standard_target *t = (void *)ipt_get_target(s); + const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ @@ -263,17 +277,18 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, return 0; } -static void trace_packet(struct sk_buff *skb, +static void trace_packet(const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, - struct xt_table_info *private, - struct ipt_entry *e) + const struct xt_table_info *private, + const struct ipt_entry *e) { - void *table_base; + const void *table_base; const struct ipt_entry *root; const char *hookname, *chainname, *comment; + const struct ipt_entry *iter; unsigned int rulenum = 0; table_base = private->entries[smp_processor_id()]; @@ -282,10 +297,10 @@ static void trace_packet(struct sk_buff *skb, hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; - IPT_ENTRY_ITERATE(root, - private->size - private->hook_entry[hook], - get_chainname_rulenum, - e, hookname, &chainname, &comment, &rulenum); + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", @@ -315,9 +330,9 @@ ipt_do_table(struct sk_buff *skb, /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; - void *table_base; + const void *table_base; struct ipt_entry *e, *back; - struct xt_table_info *private; + const struct xt_table_info *private; struct xt_match_param mtpar; struct xt_target_param tgpar; @@ -350,17 +365,22 @@ ipt_do_table(struct sk_buff *skb, back = get_entry(table_base, private->underflow[hook]); do { - struct ipt_entry_target *t; + const struct ipt_entry_target *t; + const struct xt_entry_match *ematch; IP_NF_ASSERT(e); IP_NF_ASSERT(back); if (!ip_packet_match(ip, indev, outdev, - &e->ip, mtpar.fragoff) || - IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + &e->ip, mtpar.fragoff)) { + no_match: e = ipt_next_entry(e); continue; } + xt_ematch_foreach(ematch, e) + if (do_match(ematch, skb, &mtpar) != 0) + goto no_match; + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); t = ipt_get_target(e); @@ -443,7 +463,7 @@ ipt_do_table(struct sk_buff *skb, /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -461,8 +481,8 @@ mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct ipt_standard_target *t - = (void *)ipt_get_target(e); + const struct ipt_standard_target *t + = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { @@ -552,27 +572,23 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static int -cleanup_match(struct ipt_entry_match *m, unsigned int *i) +static void cleanup_match(struct ipt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; - if (i && (*i)-- == 0) - return 1; - + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); - return 0; } static int -check_entry(struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; + const struct ipt_entry_target *t; if (!ip_checkentry(&e->ip)) { duprintf("ip_tables: ip check failed %p %s.\n", e, name); @@ -583,7 +599,7 @@ check_entry(struct ipt_entry *e, const char *name) e->next_offset) return -EINVAL; - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (e->target_offset + t->u.target_size > e->next_offset) return -EINVAL; @@ -591,8 +607,7 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; int ret; @@ -607,13 +622,11 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, par.match->name); return ret; } - ++*i; return 0; } static int -find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, - unsigned int *i) +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; @@ -627,7 +640,7 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, } m->u.kernel.match = match; - ret = check_match(m, par, i); + ret = check_match(m, par); if (ret) goto err; @@ -637,10 +650,11 @@ err: return ret; } -static int check_target(struct ipt_entry *e, const char *name) +static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct ipt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -661,27 +675,32 @@ static int check_target(struct ipt_entry *e, const char *name) } static int -find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size) { struct ipt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; ret = check_entry(e, name); if (ret) return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; - ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); - if (ret != 0) - goto cleanup_matches; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -695,27 +714,29 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; - - (*i)++; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } -static bool check_underflow(struct ipt_entry *e) +static bool check_underflow(const struct ipt_entry *e) { const struct ipt_entry_target *t; unsigned int verdict; if (!unconditional(&e->ip)) return false; - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct ipt_standard_target *)t)->verdict; @@ -726,12 +747,11 @@ static bool check_underflow(struct ipt_entry *e) static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int valid_hooks, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; @@ -768,50 +788,42 @@ check_entry_size_and_hooks(struct ipt_entry *e, /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static int -cleanup_entry(struct ipt_entry *e, unsigned int *i) +static void +cleanup_entry(struct ipt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct ipt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - IPT_MATCH_ITERATE(e, cleanup_match, NULL); + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); t = ipt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - return 0; } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ipt_replace *repl) { + struct ipt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -822,49 +834,58 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, valid_hooks, &i); - if (ret != 0) - return ret; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + return ret; + ++i; + } - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } return ret; } @@ -877,33 +898,11 @@ translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int -add_entry_to_counter(const struct ipt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int -set_entry_to_counter(const struct ipt_entry *e, - struct ipt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct ipt_entry *iter; unsigned int cpu; unsigned int i; unsigned int curcpu; @@ -919,32 +918,32 @@ get_counters(const struct xt_table_info *t, curcpu = smp_processor_id(); i = 0; - IPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[curcpu], t->size) { + SET_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; + } for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; i = 0; xt_info_wrlock(cpu); - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + ADD_COUNTER(counters[i], iter->counters.bcnt, + iter->counters.pcnt); + ++i; /* macro does multi eval of i */ + } xt_info_wrunlock(cpu); } local_bh_enable(); } -static struct xt_counters * alloc_counters(struct xt_table *table) +static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -962,11 +961,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) static int copy_entries_to_user(unsigned int total_size, - struct xt_table *table, + const struct xt_table *table, void __user *userptr) { unsigned int off, num; - struct ipt_entry *e; + const struct ipt_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; @@ -1018,7 +1017,7 @@ copy_entries_to_user(unsigned int total_size, } } - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset + offsetof(struct ipt_entry_target, u.user.name), @@ -1035,7 +1034,7 @@ copy_entries_to_user(unsigned int total_size, } #ifdef CONFIG_COMPAT -static void compat_standard_from_user(void *dst, void *src) +static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; @@ -1044,7 +1043,7 @@ static void compat_standard_from_user(void *dst, void *src) memcpy(dst, &v, sizeof(v)); } -static int compat_standard_to_user(void __user *dst, void *src) +static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; @@ -1053,25 +1052,20 @@ static int compat_standard_to_user(void __user *dst, void *src) return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } -static inline int -compat_calc_match(struct ipt_entry_match *m, int *size) -{ - *size += xt_compat_match_offset(m->u.kernel.match); - return 0; -} - -static int compat_calc_entry(struct ipt_entry *e, +static int compat_calc_entry(const struct ipt_entry *e, const struct xt_table_info *info, - void *base, struct xt_table_info *newinfo) + const void *base, struct xt_table_info *newinfo) { - struct ipt_entry_target *t; + const struct xt_entry_match *ematch; + const struct ipt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - base; - IPT_MATCH_ITERATE(e, compat_calc_match, &off); - t = ipt_get_target(e); + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ipt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); @@ -1092,7 +1086,9 @@ static int compat_calc_entry(struct ipt_entry *e, static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { + struct ipt_entry *iter; void *loc_cpu_entry; + int ret; if (!newinfo || !info) return -EINVAL; @@ -1101,13 +1097,17 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; - return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, - compat_calc_entry, info, loc_cpu_entry, - newinfo); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; } #endif -static int get_info(struct net *net, void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, + const int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; @@ -1167,7 +1167,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) } static int -get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, + const int *len) { int ret; struct ipt_get_entries get; @@ -1215,6 +1216,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; + struct ipt_entry *iter; ret = 0; counters = vmalloc(num_counters * sizeof(struct xt_counters)); @@ -1257,8 +1259,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1277,12 +1280,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, } static int -do_replace(struct net *net, void __user *user, unsigned int len) +do_replace(struct net *net, const void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ipt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1303,9 +1307,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1318,27 +1320,16 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -static int -do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { unsigned int i, curcpu; struct xt_counters_info tmp; @@ -1351,6 +1342,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct ipt_entry *iter; #ifdef CONFIG_COMPAT struct compat_xt_counters_info compat_tmp; @@ -1408,11 +1400,10 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat curcpu = smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); - IPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc, - &i); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } xt_info_wrunlock(curcpu); unlock_up_free: local_bh_enable(); @@ -1440,45 +1431,40 @@ struct compat_ipt_replace { static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, - unsigned int *i) + unsigned int i) { struct ipt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; - int ret; + const struct xt_entry_match *ematch; + int ret = 0; - ret = -EFAULT; origsize = *size; ce = (struct compat_ipt_entry __user *)*dstptr; - if (copy_to_user(ce, e, sizeof(struct ipt_entry))) - goto out; - - if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) - goto out; + if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; *dstptr += sizeof(struct compat_ipt_entry); *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } target_offset = e->target_offset - (origsize - *size); - if (ret) - goto out; t = ipt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) - goto out; - ret = -EFAULT; + return ret; next_offset = e->next_offset - (origsize - *size); - if (put_user(target_offset, &ce->target_offset)) - goto out; - if (put_user(next_offset, &ce->next_offset)) - goto out; - - (*i)++; + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; return 0; -out: - return ret; } static int @@ -1486,7 +1472,7 @@ compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, - int *size, unsigned int *i) + int *size) { struct xt_match *match; @@ -1500,47 +1486,32 @@ compat_find_calc_match(struct ipt_entry_match *m, } m->u.kernel.match = match; *size += xt_compat_match_offset(match); - - (*i)++; - return 0; -} - -static int -compat_release_match(struct ipt_entry_match *m, unsigned int *i) -{ - if (i && (*i)-- == 0) - return 1; - - module_put(m->u.kernel.match->me); return 0; } -static int -compat_release_entry(struct compat_ipt_entry *e, unsigned int *i) +static void compat_release_entry(struct compat_ipt_entry *e) { struct ipt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_entry_match *ematch; /* Cleanup all matches */ - COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL); + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); t = compat_ipt_get_target(e); module_put(t->u.kernel.target->me); - return 0; } static int check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, - unsigned char *base, - unsigned char *limit, - unsigned int *hook_entries, - unsigned int *underflows, - unsigned int *i, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, const char *name) { + struct xt_entry_match *ematch; struct ipt_entry_target *t; struct xt_target *target; unsigned int entry_offset; @@ -1569,10 +1540,13 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - (void *)base; j = 0; - ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name, - &e->ip, e->comefrom, &off, &j); - if (ret != 0) - goto release_matches; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, name, + &e->ip, e->comefrom, &off); + if (ret != 0) + goto release_matches; + ++j; + } t = compat_ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1604,14 +1578,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, /* Clear counters and comefrom */ memset(&e->counters, 0, sizeof(e->counters)); e->comefrom = 0; - - (*i)++; return 0; out: module_put(t->u.kernel.target->me); release_matches: - IPT_MATCH_ITERATE(e, compat_release_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } return ret; } @@ -1625,6 +1601,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct ipt_entry *de; unsigned int origsize; int ret, h; + struct xt_entry_match *ematch; ret = 0; origsize = *size; @@ -1635,10 +1612,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user, - dstptr, size); - if (ret) - return ret; + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_from_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; @@ -1655,36 +1633,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } static int -compat_check_entry(struct ipt_entry *e, const char *name, - unsigned int *i) +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) { + struct xt_entry_match *ematch; struct xt_mtchk_param mtpar; unsigned int j; - int ret; + int ret = 0; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; - ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); - if (ret) - goto cleanup_matches; + xt_ematch_foreach(ematch, e) { + ret = check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto cleanup_matches; - - (*i)++; return 0; cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1696,6 +1681,8 @@ translate_compat_table(const char *name, unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; + struct compat_ipt_entry *iter0; + struct ipt_entry *iter1; unsigned int size; int ret; @@ -1714,13 +1701,17 @@ translate_compat_table(const char *name, j = 0; xt_compat_lock(AF_INET); /* Walk through entries, checking offsets. */ - ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, - check_compat_entry_size_and_hooks, - info, &size, entry0, - entry0 + total_size, - hook_entries, underflows, &j, name); - if (ret != 0) - goto out_unlock; + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } ret = -EINVAL; if (j != number) { @@ -1759,9 +1750,12 @@ translate_compat_table(const char *name, entry1 = newinfo->entries[raw_smp_processor_id()]; pos = entry1; size = total_size; - ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, - compat_copy_entry_from_user, - &pos, &size, name, newinfo, entry1); + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); if (ret) @@ -1772,13 +1766,32 @@ translate_compat_table(const char *name, goto free_newinfo; i = 0; - ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = compat_check_entry(iter1, net, name); + if (ret != 0) + break; + ++i; + } if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; j -= i; - COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, - compat_release_entry, &j); - IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1, net); + } xt_free_table_info(newinfo); return ret; } @@ -1796,7 +1809,11 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } return ret; out_unlock: xt_compat_flush_offsets(AF_INET); @@ -1811,6 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) struct compat_ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; + struct ipt_entry *iter; if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; @@ -1833,7 +1851,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -1849,7 +1867,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1898,6 +1917,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, int ret = 0; const void *loc_cpu_entry; unsigned int i = 0; + struct ipt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1910,9 +1930,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size, - compat_copy_entry_to_user, - &pos, &size, counters, &i); + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } vfree(counters); return ret; @@ -2086,11 +2109,7 @@ struct xt_table *ipt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; @@ -2108,17 +2127,19 @@ out: return ERR_PTR(ret); } -void ipt_unregister_table(struct xt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; + struct ipt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 40ca2d240ab..0886f96c736 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -560,8 +560,7 @@ struct clusterip_seq_position { static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { - const struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; + struct clusterip_config *c = s->private; unsigned int weight; u_int32_t local_nodes; struct clusterip_seq_position *idx; @@ -632,10 +631,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE(inode)->data; - sf->private = pde; + sf->private = c; clusterip_config_get(c); } @@ -645,8 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) static int clusterip_proc_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE(inode)->data; int ret; ret = seq_release(inode, file); @@ -660,10 +657,9 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { + struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - struct clusterip_config *c = pde->data; unsigned long nodenum; if (copy_from_user(buffer, input, PROC_WRITELEN)) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 399061c3fd7..09a5d3f7cc4 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -338,7 +338,7 @@ struct compat_ipt_ulog_info { char prefix[ULOG_PREFIX_LEN]; }; -static void ulog_tg_compat_from_user(void *dst, void *src) +static void ulog_tg_compat_from_user(void *dst, const void *src) { const struct compat_ipt_ulog_info *cl = src; struct ipt_ulog_info l = { @@ -351,7 +351,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src) memcpy(dst, &l, sizeof(l)); } -static int ulog_tg_compat_to_user(void __user *dst, void *src) +static int ulog_tg_compat_to_user(void __user *dst, const void *src) { const struct ipt_ulog_info *l = src; struct compat_ipt_ulog_info cl = { diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd68e..c8dc9800d62 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -23,104 +23,32 @@ MODULE_DESCRIPTION("iptables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_FILTER, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ipt_local_in_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_filter); -} - static unsigned int -ipt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +iptable_filter_hook(unsigned int hook, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_filter); -} + const struct net *net; -static unsigned int -ipt_local_out_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) + if (hook == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, hook, in, out, - dev_net(out)->ipv4.iptable_filter); + + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = ipt_local_in_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = ipt_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = ipt_local_out_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_FILTER, - }, -}; +static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static int forward = NF_ACCEPT; @@ -128,9 +56,18 @@ module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ipt_standard *)repl->entries)[1].target.verdict = + -forward - 1; + net->ipv4.iptable_filter = - ipt_register_table(net, &packet_filter, &initial_table.repl); + ipt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_filter)) return PTR_ERR(net->ipv4.iptable_filter); return 0; @@ -138,7 +75,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_filter); + ipt_unregister_table(net, net->ipv4.iptable_filter); } static struct pernet_operations iptable_filter_net_ops = { @@ -155,17 +92,16 @@ static int __init iptable_filter_init(void) return -EINVAL; } - /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; - ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); + if (IS_ERR(filter_ops)) { + ret = PTR_ERR(filter_ops); goto cleanup_table; + } return ret; @@ -176,7 +112,7 @@ static int __init iptable_filter_init(void) static void __exit iptable_filter_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&iptable_filter_net_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index fae78c3076c..b9b83464cbf 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -27,101 +27,16 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[5]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "mangle", - .valid_hooks = MANGLE_VALID_HOOKS, - .num_entries = 6, - .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_MANGLE, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ipt_pre_routing_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_mangle); -} - -static unsigned int -ipt_post_routing_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(out)->ipv4.iptable_mangle); -} - -static unsigned int -ipt_local_in_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_mangle); -} - -static unsigned int -ipt_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_mangle); -} - static unsigned int -ipt_local_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) { unsigned int ret; const struct iphdr *iph; @@ -141,7 +56,7 @@ ipt_local_hook(unsigned int hook, daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, + ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { @@ -158,49 +73,36 @@ ipt_local_hook(unsigned int hook, return ret; } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = ipt_pre_routing_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_local_in_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_forward_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_local_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_post_routing_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, -}; +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_mangle_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (hook == NF_INET_LOCAL_OUT) + return ipt_mangle_out(skb, out); + if (hook == NF_INET_POST_ROUTING) + return ipt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.iptable_mangle); + /* PREROUTING/INPUT/FORWARD: */ + return ipt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.iptable_mangle); +} + +static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init iptable_mangle_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_mangle = - ipt_register_table(net, &packet_mangler, &initial_table.repl); + ipt_register_table(net, &packet_mangler, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_mangle)) return PTR_ERR(net->ipv4.iptable_mangle); return 0; @@ -208,7 +110,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_mangle); + ipt_unregister_table(net, net->ipv4.iptable_mangle); } static struct pernet_operations iptable_mangle_net_ops = { @@ -225,9 +127,11 @@ static int __init iptable_mangle_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); goto cleanup_table; + } return ret; @@ -238,7 +142,7 @@ static int __init iptable_mangle_init(void) static void __exit iptable_mangle_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&iptable_mangle_net_ops); } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23be0..06fb9d11953 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -9,90 +9,44 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[2]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW, }; /* The work comes in here from netfilter.c. */ static unsigned int -ipt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +iptable_raw_hook(unsigned int hook, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_raw); -} + const struct net *net; -static unsigned int -ipt_local_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) + if (hook == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, hook, in, out, - dev_net(out)->ipv4.iptable_raw); + + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); } -/* 'raw' is the very first table. */ -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = ipt_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, - { - .hook = ipt_local_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, -}; +static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_raw = - ipt_register_table(net, &packet_raw, &initial_table.repl); + ipt_register_table(net, &packet_raw, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_raw)) return PTR_ERR(net->ipv4.iptable_raw); return 0; @@ -100,7 +54,7 @@ static int __net_init iptable_raw_net_init(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_raw); + ipt_unregister_table(net, net->ipv4.iptable_raw); } static struct pernet_operations iptable_raw_net_ops = { @@ -117,9 +71,11 @@ static int __init iptable_raw_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); + if (IS_ERR(rawtable_ops)) { + ret = PTR_ERR(rawtable_ops); goto cleanup_table; + } return ret; @@ -130,7 +86,7 @@ static int __init iptable_raw_init(void) static void __exit iptable_raw_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&iptable_raw_net_ops); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 3bd3d6388da..cce2f64e6f2 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -27,109 +27,44 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "security", - .valid_hooks = SECURITY_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_SECURITY, }; static unsigned int -ipt_local_in_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_security); -} - -static unsigned int -ipt_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +iptable_security_hook(unsigned int hook, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.iptable_security); -} + const struct net *net; -static unsigned int -ipt_local_out_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Somebody is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) + if (hook == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* Somebody is playing with raw sockets. */ return NF_ACCEPT; - return ipt_do_table(skb, hook, in, out, - dev_net(out)->ipv4.iptable_security); + + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = ipt_local_in_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_SECURITY, - }, - { - .hook = ipt_forward_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_SECURITY, - }, - { - .hook = ipt_local_out_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_SECURITY, - }, -}; +static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init iptable_security_net_init(struct net *net) { - net->ipv4.iptable_security = - ipt_register_table(net, &security_table, &initial_table.repl); + struct ipt_replace *repl; + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_security)) return PTR_ERR(net->ipv4.iptable_security); @@ -138,7 +73,7 @@ static int __net_init iptable_security_net_init(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_security); + ipt_unregister_table(net, net->ipv4.iptable_security); } static struct pernet_operations iptable_security_net_ops = { @@ -154,9 +89,11 @@ static int __init iptable_security_init(void) if (ret < 0) return ret; - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); + if (IS_ERR(sectbl_ops)) { + ret = PTR_ERR(sectbl_ops); goto cleanup_table; + } return ret; @@ -167,7 +104,7 @@ cleanup_table: static void __exit iptable_security_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&iptable_security_net_ops); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a65..2bb1f87051c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -22,6 +22,7 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/nf_nat_helper.h> @@ -210,7 +211,7 @@ static ctl_table ip_ct_sysctl_table[] = { }, { .procname = "ip_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), &tuple); + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda..2fb7b76da94 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7afd39b5b78..7404bde9599 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -18,6 +18,7 @@ #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_log.h> static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct net *net, struct sk_buff *skb, +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &innertuple); + h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -163,7 +165,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; @@ -208,7 +211,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3ebd1..cb763ae9ed9 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -17,6 +17,10 @@ #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include <net/netfilter/nf_conntrack.h> +#endif +#include <net/netfilter/nf_conntrack_zones.h> /* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) @@ -38,15 +42,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN; + return IP_DEFRAG_CONNTRACK_IN + zone; else - return IP_DEFRAG_CONNTRACK_OUT; + return IP_DEFRAG_CONNTRACK_OUT + zone; } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, @@ -59,7 +70,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; #endif #endif diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd..4595281c286 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,14 +30,12 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_zones.h> static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; -/* Calculated at init based on memory size */ -static unsigned int nf_nat_htable_size __read_mostly; - #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; @@ -72,15 +70,16 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all, + (__force u32)tuple->src.u.all ^ zone, tuple->dst.protonum, 0); - return ((u64)hash * nf_nat_htable_size) >> 32; + return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -142,12 +141,12 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, +find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(tuple); + unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -155,7 +154,7 @@ find_appropriate_src(struct net *net, rcu_read_lock(); hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple)) { + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -178,7 +177,7 @@ find_appropriate_src(struct net *net, the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(struct nf_conntrack_tuple *tuple, +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -212,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, range->flags & IP_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip, 0); + 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } @@ -232,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, { struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; + u16 zone = nf_ct_zone(ct); /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(net, orig_tuple, tuple, range)) { + if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -252,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* 2) Select the least-used IP/proto combination in the given range. */ *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, ct, maniptype); + find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ @@ -330,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -679,8 +680,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + /* Leave them the same for the moment. */ + net->ipv4.nat_htable_size = net->ct.htable_size; + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -703,7 +706,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - nf_nat_htable_size); + net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -724,9 +727,6 @@ static int __init nf_nat_init(void) return ret; } - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - ret = register_pernet_subsys(&nf_nat_net_ops); if (ret < 0) goto cleanup_extend; diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index a1d5d58a58b..86e0e84ff0a 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -27,76 +27,29 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ -static int -mangle_rfc959_packet(struct sk_buff *skb, - __be32 newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) +static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type, + char *buffer, size_t buflen, + __be32 addr, u16 port) { - char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; - - sprintf(buffer, "%u,%u,%u,%u,%u,%u", - NIPQUAD(newip), port>>8, port&0xFF); - - pr_debug("calling nf_nat_mangle_tcp_packet\n"); - - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); -} - -/* |1|132.235.1.2|6275| */ -static int -mangle_eprt_packet(struct sk_buff *skb, - __be32 newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - char buffer[sizeof("|1|255.255.255.255|65535|")]; - - sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); - - pr_debug("calling nf_nat_mangle_tcp_packet\n"); - - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); -} - -/* |1|132.235.1.2|6275| */ -static int -mangle_epsv_packet(struct sk_buff *skb, - __be32 newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - char buffer[sizeof("|||65535|")]; - - sprintf(buffer, "|||%u|", port); - - pr_debug("calling nf_nat_mangle_tcp_packet\n"); + switch (type) { + case NF_CT_FTP_PORT: + case NF_CT_FTP_PASV: + return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u", + ((unsigned char *)&addr)[0], + ((unsigned char *)&addr)[1], + ((unsigned char *)&addr)[2], + ((unsigned char *)&addr)[3], + port >> 8, + port & 0xFF); + case NF_CT_FTP_EPRT: + return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port); + case NF_CT_FTP_EPSV: + return snprintf(buffer, buflen, "|||%u|", port); + } - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); + return 0; } -static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, - unsigned int, unsigned int, struct nf_conn *, - enum ip_conntrack_info) -= { - [NF_CT_FTP_PORT] = mangle_rfc959_packet, - [NF_CT_FTP_PASV] = mangle_rfc959_packet, - [NF_CT_FTP_EPRT] = mangle_eprt_packet, - [NF_CT_FTP_EPSV] = mangle_epsv_packet -}; - /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ static unsigned int nf_nat_ftp(struct sk_buff *skb, @@ -110,6 +63,8 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct nf_conn *ct = exp->master; + char buffer[sizeof("|1|255.255.255.255|65535|")]; + unsigned int buflen; pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); @@ -132,11 +87,21 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, if (port == 0) return NF_DROP; - if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { - nf_ct_unexpect_related(exp); - return NF_DROP; - } + buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port); + if (!buflen) + goto out; + + pr_debug("calling nf_nat_mangle_tcp_packet\n"); + + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + matchlen, buffer, buflen)) + goto out; + return NF_ACCEPT; + +out: + nf_ct_unexpect_related(exp); + return NF_DROP; } static void __exit nf_nat_ftp_fini(void) diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 7f10a6be019..4b6af4bb1f5 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -141,6 +141,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) return 1; } +void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s16 off) +{ + if (!off) + return; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); +} +EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); + /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -149,14 +160,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * skb enlargement, ... * * */ -int -nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) { struct rtable *rt = skb_rtable(skb); struct iphdr *iph; @@ -202,16 +212,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); - if (rep_len != match_len) { - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); - } + if (adjust && rep_len != match_len) + nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); + return 1; } -EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); +EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb171056c6..4c060038d29 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -25,6 +25,7 @@ #include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <linux/netfilter/nf_conntrack_pptp.h> @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(net, &t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0dfb4e..ab74cc0535e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,36 +28,6 @@ (1 << NF_INET_POST_ROUTING) | \ (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} nat_initial_table __net_initdata = { - .repl = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = { static int __net_init nf_nat_rule_net_init(struct net *net) { - net->ipv4.nat_table = ipt_register_table(net, &nat_table, - &nat_initial_table.repl); + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nat_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.nat_table)) return PTR_ERR(net->ipv4.nat_table); return 0; @@ -195,7 +170,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) static void __net_exit nf_nat_rule_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.nat_table); + ipt_unregister_table(net, net->ipv4.nat_table); } static struct pernet_operations nf_nat_rule_net_ops = { diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 07d61a57613..11b538deaae 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -1,4 +1,4 @@ -/* SIP extension for UDP NAT alteration. +/* SIP extension for NAT alteration. * * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> * based on RR's ip_nat_ftp.c and other modules. @@ -15,6 +15,7 @@ #include <linux/ip.h> #include <net/ip.h> #include <linux/udp.h> +#include <linux/tcp.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> @@ -29,25 +30,42 @@ MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -static unsigned int mangle_packet(struct sk_buff *skb, +static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, const char *buffer, unsigned int buflen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, - buffer, buflen)) - return 0; + struct tcphdr *th; + unsigned int baseoff; + + if (nf_ct_protonum(ct) == IPPROTO_TCP) { + th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); + baseoff = ip_hdrlen(skb) + th->doff * 4; + matchoff += dataoff - baseoff; + + if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + matchoff, matchlen, + buffer, buflen, false)) + return 0; + } else { + baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); + matchoff += dataoff - baseoff; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + matchoff, matchlen, + buffer, buflen)) + return 0; + } /* Reload data pointer and adjust datalen value */ - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *dptr = skb->data + dataoff; *datalen += buflen - matchlen; return 1; } -static int map_addr(struct sk_buff *skb, +static int map_addr(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, union nf_inet_addr *addr, __be16 port) @@ -76,11 +94,11 @@ static int map_addr(struct sk_buff *skb, buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } -static int map_sip_addr(struct sk_buff *skb, +static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, enum sip_header_types type) { @@ -93,16 +111,18 @@ static int map_sip_addr(struct sk_buff *skb, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, &matchoff, &matchlen, &addr, &port) <= 0) return 1; - return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); + return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, + &addr, port); } -static unsigned int ip_nat_sip(struct sk_buff *skb, +static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned int dataoff, matchoff, matchlen; + unsigned int coff, matchoff, matchlen; + enum sip_header_types hdr; union nf_inet_addr addr; __be16 port; int request, in_header; @@ -112,16 +132,21 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && - !map_addr(skb, dptr, datalen, matchoff, matchlen, + !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; request = 1; } else request = 0; + if (nf_ct_protonum(ct) == IPPROTO_TCP) + hdr = SIP_HDR_VIA_TCP; + else + hdr = SIP_HDR_VIA_UDP; + /* Translate topmost Via header and parameters */ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, - SIP_HDR_VIA, NULL, &matchoff, &matchlen, + hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { unsigned int matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; @@ -138,7 +163,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, goto next; } - if (!map_addr(skb, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; @@ -153,8 +178,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.dst.u3.ip); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } @@ -167,8 +192,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.src.u3.ip); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } @@ -181,31 +206,45 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } } next: /* Translate Contact headers */ - dataoff = 0; + coff = 0; in_header = 0; - while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, + while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, SIP_HDR_CONTACT, &in_header, &matchoff, &matchlen, &addr, &port) > 0) { - if (!map_addr(skb, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; } - if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) + if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; + return NF_ACCEPT; } +static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) + return; + + th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} + /* Handles expected signalling connections and media streams */ static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) @@ -232,7 +271,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct, } } -static unsigned int ip_nat_sip_expect(struct sk_buff *skb, +static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, unsigned int matchoff, @@ -279,8 +318,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, if (exp->tuple.dst.u3.ip != exp->saved_ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sprintf(buffer, "%pI4:%u", &newip, port); - if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) goto err; } return NF_ACCEPT; @@ -290,7 +329,7 @@ err: return NF_DROP; } -static int mangle_content_len(struct sk_buff *skb, +static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -312,12 +351,13 @@ static int mangle_content_len(struct sk_buff *skb, return 0; buflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } -static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, +static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, char *buffer, int buflen) @@ -326,16 +366,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; - if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, + if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, &matchoff, &matchlen) <= 0) return -ENOENT; - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; } -static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, - unsigned int *datalen, +static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr) @@ -344,16 +384,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, buffer, buflen)) return 0; - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_port(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, +static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, u_int16_t port) @@ -362,16 +401,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int buflen; buflen = sprintf(buffer, "%u", port); - if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, + if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) return 0; - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, - unsigned int *datalen, +static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, const union nf_inet_addr *addr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; @@ -379,12 +418,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, buffer, buflen)) { case 0: @@ -401,14 +440,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, return 0; } - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp_media(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, +static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp, unsigned int mediaoff, @@ -456,7 +494,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && - !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) + !ip_nat_sdp_port(skb, dataoff, dptr, datalen, + mediaoff, medialen, port)) goto err2; return NF_ACCEPT; @@ -471,6 +510,7 @@ err1: static void __exit nf_nat_sip_fini(void) { rcu_assign_pointer(nf_nat_sip_hook, NULL); + rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); @@ -482,12 +522,14 @@ static void __exit nf_nat_sip_fini(void) static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); BUG_ON(nf_nat_sip_expect_hook != NULL); BUG_ON(nf_nat_sdp_addr_hook != NULL); BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); + rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d9521f6f9ed..0b9c7ce3d6c 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1038,7 +1038,7 @@ static int snmp_parse_mangle(unsigned char *msg, unsigned int cls, con, tag, vers, pdutype; struct asn1_ctx ctx; struct asn1_octstr comm; - struct snmp_object **obj; + struct snmp_object *obj; if (debug > 1) hex_dump(msg, len); @@ -1148,43 +1148,34 @@ static int snmp_parse_mangle(unsigned char *msg, if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) return 0; - obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (obj == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); - return 0; - } - while (!asn1_eoc_decode(&ctx, eoc)) { unsigned int i; - if (!snmp_object_decode(&ctx, obj)) { - if (*obj) { - kfree((*obj)->id); - kfree(*obj); + if (!snmp_object_decode(&ctx, &obj)) { + if (obj) { + kfree(obj->id); + kfree(obj); } - kfree(obj); return 0; } if (debug > 1) { printk(KERN_DEBUG "bsalg: object: "); - for (i = 0; i < (*obj)->id_len; i++) { + for (i = 0; i < obj->id_len; i++) { if (i > 0) printk("."); - printk("%lu", (*obj)->id[i]); + printk("%lu", obj->id[i]); } - printk(": type=%u\n", (*obj)->type); + printk(": type=%u\n", obj->type); } - if ((*obj)->type == SNMP_IPADDR) + if (obj->type == SNMP_IPADDR) mangle_address(ctx.begin, ctx.pointer - 4 , map, check); - kfree((*obj)->id); - kfree(*obj); + kfree(obj->id); + kfree(obj); } - kfree(obj); if (!asn1_eoc_decode(&ctx, eoc)) return 0; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f25542c48b7..242ed230737 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_SENTINEL }; -static struct { - char *name; +static const struct { + const char *name; int index; } icmpmibmap[] = { { "DestUnreachs", ICMP_DEST_UNREACH }, @@ -280,7 +280,7 @@ static void icmpmsg_put(struct seq_file *seq) count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); if (val) { type[count] = i; vals[count++] = val; @@ -307,18 +307,18 @@ static void icmp_put(struct seq_file *seq) for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu", - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, + snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, + snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, icmpmibmap[i].index | 0x100)); } @@ -341,7 +341,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.ip_statistics, + snmp_fold_field((void __percpu **)net->mib.ip_statistics, snmp4_ipstats_list[i].entry)); icmp_put(seq); /* RFC 2011 compatibility */ @@ -356,11 +356,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void **)net->mib.tcp_statistics, + snmp_fold_field((void __percpu **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.tcp_statistics, + snmp_fold_field((void __percpu **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -371,7 +371,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.udp_statistics, + snmp_fold_field((void __percpu **)net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -382,7 +382,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.udplite_statistics, + snmp_fold_field((void __percpu **)net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -419,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.net_statistics, + snmp_fold_field((void __percpu **)net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -429,7 +429,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.ip_statistics, + snmp_fold_field((void __percpu **)net->mib.ip_statistics, snmp4_ipextstats_list[i].entry)); seq_putc(seq, '\n'); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e446496f564..b2ba5581d2a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) if (!rt_hash_table[st->bucket].chain) continue; rcu_read_lock_bh(); - r = rcu_dereference(rt_hash_table[st->bucket].chain); + r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { if (dev_net(r->u.dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference(r->u.dst.rt_next); + r = rcu_dereference_bh(r->u.dst.rt_next); } rcu_read_unlock_bh(); } @@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } - return rcu_dereference(r); + return rcu_dereference_bh(r); } static struct rtable *rt_cache_get_next(struct seq_file *seq, @@ -586,7 +586,9 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) { remove_proc_entry("rt_cache", net->proc_net_stat); remove_proc_entry("rt_cache", net->proc_net); +#ifdef CONFIG_NET_CLS_ROUTE remove_proc_entry("rt_acct", net->proc_net); +#endif } static struct pernet_operations ip_rt_proc_ops __net_initdata = { @@ -1988,8 +1990,13 @@ static int __mkroute_input(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is * invalid for proxy arp. DNAT routes are always valid. + * + * Proxy arp feature have been extended to allow, ARP + * replies back to the same interface, to support + * Private VLAN switch technologies. See arp.c. */ - if (out_dev == in_dev) { + if (out_dev == in_dev && + IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { err = -EINVAL; goto cleanup; } @@ -2687,8 +2694,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; + rth = rcu_dereference_bh(rth->u.dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -3006,8 +3013,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!rt_hash_table[h].chain) continue; rcu_read_lock_bh(); - for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference(rt->u.dst.rt_next), idx++) { + for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; + rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) @@ -3327,7 +3334,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = { #ifdef CONFIG_NET_CLS_ROUTE -struct ip_rt_acct *ip_rt_acct __read_mostly; +struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ static __initdata unsigned long rhash_entries; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 66fd80ef247..5c24db4a3c9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(&rt->u.dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce399..c1bc074f61b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -576,6 +576,20 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_thin_linear_timeouts", + .data = &sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_thin_dupack", + .data = &sysctl_tcp_thin_dupack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0a26bb25e2..5901010fad5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) tp->nonagle &= ~TCP_NAGLE_PUSH; } -static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, - struct sk_buff *skb) +static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) { if (flags & MSG_OOB) tp->snd_up = tp->write_seq; @@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, static inline void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle) { - struct tcp_sock *tp = tcp_sk(sk); - if (tcp_send_head(sk)) { - struct sk_buff *skb = tcp_write_queue_tail(sk); + struct tcp_sock *tp = tcp_sk(sk); + if (!(flags & MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, skb); - tcp_mark_urg(tp, flags, skb); + tcp_mark_push(tp, tcp_write_queue_tail(sk)); + + tcp_mark_urg(tp, flags); __tcp_push_pending_frames(sk, mss_now, (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); } @@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk) +static inline int select_size(struct sock *sk, int sg) { struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; - if (sk->sk_route_caps & NETIF_F_SG) { + if (sg) { if (sk_can_gso(sk)) tmp = 0; else { @@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int iovlen, flags; int mss_now, size_goal; - int err, copied; + int sg, err, copied; long timeo; lock_sock(sk); @@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; + sg = sk->sk_route_caps & NETIF_F_SG; + while (--iovlen >= 0) { int seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; @@ -959,8 +960,9 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, select_size(sk), - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, + select_size(sk, sg), + sk->sk_allocation); if (!skb) goto wait_for_memory; @@ -997,9 +999,7 @@ new_segment: /* We can extend the last page * fragment. */ merge = 1; - } else if (i == MAX_SKB_FRAGS || - (!i && - !(sk->sk_route_caps & NETIF_F_SG))) { + } else if (i == MAX_SKB_FRAGS || !sg) { /* Need to add new fragment and cannot * do this because interface is non-SG, * or because all the page slots are @@ -2229,6 +2229,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; + case TCP_THIN_LINEAR_TIMEOUTS: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_lto = val; + break; + + case TCP_THIN_DUPACK: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_dupack = val; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit @@ -2788,10 +2802,10 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool **tcp_md5sig_pool; +static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) { int cpu; for_each_possible_cpu(cpu) { @@ -2808,7 +2822,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool **pool = NULL; + struct tcp_md5sig_pool * __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2822,10 +2836,11 @@ void tcp_free_md5sig_pool(void) EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) +static struct tcp_md5sig_pool * __percpu * +__tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool **pool; + struct tcp_md5sig_pool * __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool *); if (!pool) @@ -2852,9 +2867,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool **pool; + struct tcp_md5sig_pool * __percpu *pool; int alloc = 0; retry: @@ -2873,7 +2888,9 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); + struct tcp_md5sig_pool * __percpu *p; + + p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); if (!p) { tcp_md5sig_users--; @@ -2897,7 +2914,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) { - struct tcp_md5sig_pool **p; + struct tcp_md5sig_pool * __percpu *p; spin_lock_bh(&tcp_md5sig_pool_lock); p = tcp_md5sig_pool; if (p) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e02963249..788851ca8c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; +int sysctl_tcp_thin_dupack __read_mostly; + int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_abc __read_mostly; @@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk) return 1; } + /* If a thin stream is detected, retransmit after first + * received dupack. Employ only if SACK is supported in order + * to avoid possible corner-case series of spurious retransmissions + * Use only if there are no unsent data. + */ + if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && + tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && + tcp_is_sack(tp) && !tcp_send_head(sk)) + return 1; + return 0; } @@ -5783,11 +5795,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* tcp_ack considers this ACK as duplicate * and does not calculate rtt. - * Fix it at least with timestamps. + * Force it here. */ - if (tp->rx_opt.saw_tstamp && - tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0); + tcp_ack_update_rtt(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65b8ebfd078..c3588b4fd97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -742,9 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -775,10 +775,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { - return __tcp_v4_send_synack(sk, NULL, req, rvp); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return tcp_v4_send_synack(sk, NULL, req, rvp); } /* @@ -1192,10 +1193,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_send_synack, + .rtx_syn_ack = tcp_v4_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG @@ -1373,8 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, dst, req, - (struct request_values *)&tmp_ext) || + if (tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || want_cookie) goto drop_and_free; @@ -1649,6 +1651,9 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; + if (iph->ttl < inet_sk(sk)->min_ttl) + goto discard_and_relse; + process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; @@ -2425,12 +2430,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = { }, }; -static int tcp4_proc_init_net(struct net *net) +static int __net_init tcp4_proc_init_net(struct net *net) { return tcp_proc_register(net, &tcp4_seq_afinfo); } -static void tcp4_proc_exit_net(struct net *net) +static void __net_exit tcp4_proc_exit_net(struct net *net) { tcp_proc_unregister(net, &tcp4_seq_afinfo); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383ce237640..4a1605d3f90 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale) + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); @@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss, init_cwnd = 2; else if (mss > 1460) init_cwnd = 3; - if (*rcv_wnd > init_cwnd * mss) + /* when initializing use the value from init_rcv_wnd + * rather than the default from above + */ + if (init_rcv_wnd && + (*rcv_wnd > init_rcv_wnd * mss)) + *rcv_wnd = init_rcv_wnd * mss; + else if (*rcv_wnd > init_cwnd * mss) *rcv_wnd = init_cwnd * mss; } @@ -1794,11 +1801,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle) { - struct sk_buff *skb = tcp_send_head(sk); - - if (!skb) - return; - /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and * all will be happy. @@ -2422,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; } @@ -2549,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk) &tp->rcv_wnd, &tp->window_clamp, sysctl_tcp_window_scaling, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index bb110c5ce1d..9bc805df95d 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -39,9 +39,9 @@ static int port __read_mostly = 0; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); -static int bufsize __read_mostly = 4096; +static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); -module_param(bufsize, int, 0); +module_param(bufsize, uint, 0); static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); @@ -75,12 +75,12 @@ static struct { static inline int tcp_probe_used(void) { - return (tcp_probe.head - tcp_probe.tail) % bufsize; + return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); } static inline int tcp_probe_avail(void) { - return bufsize - tcp_probe_used(); + return bufsize - tcp_probe_used() - 1; } /* @@ -116,7 +116,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; - tcp_probe.head = (tcp_probe.head + 1) % bufsize; + tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; spin_unlock(&tcp_probe.lock); @@ -149,7 +149,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static int tcpprobe_sprint(char *tbuf, int n) { const struct tcp_log *p - = tcp_probe.log + tcp_probe.tail % bufsize; + = tcp_probe.log + tcp_probe.tail; struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); @@ -192,7 +192,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, width = tcpprobe_sprint(tbuf, sizeof(tbuf)); if (cnt + width < len) - tcp_probe.tail = (tcp_probe.tail + 1) % bufsize; + tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1); spin_unlock_bh(&tcp_probe.lock); @@ -222,9 +222,10 @@ static __init int tcpprobe_init(void) init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); - if (bufsize < 0) + if (bufsize == 0) return -EINVAL; + bufsize = roundup_pow_of_two(bufsize); tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; @@ -236,7 +237,7 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("TCP probe registered (port=%d)\n", port); + pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: proc_net_remove(&init_net, procname); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8816a20c259..a17629b8912 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; +int sysctl_tcp_thin_linear_timeouts __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); @@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) icsk->icsk_retransmits++; out_reset_timer: - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is + * used to reset timer, set to 0. Recalculate 'icsk_rto' as this + * might be increased if the stream oscillates between thin and thick, + * thus the old value might already be too high compared to the value + * set by 'tcp_set_rto' in tcp_input.c which resets the rto without + * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating + * exponential backoff behaviour to avoid continue hammering + * linear-timeout retransmissions into a black hole + */ + if (sk->sk_state == TCP_ESTABLISHED && + (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + tcp_stream_is_thin(tp) && + icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { + icsk->icsk_backoff = 0; + icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + } else { + /* Use normal (exponential) backoff */ + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) __sk_dst_reset(sk); @@ -474,6 +493,12 @@ static void tcp_synack_timer(struct sock *sk) TCP_TIMEOUT_INIT, TCP_RTO_MAX); } +void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +{ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); +} +EXPORT_SYMBOL(tcp_syn_ack_timeout); + void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f0126fdd7e0..608a5446d05 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1117,7 +1117,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - unsigned int ulen, copied; + unsigned int ulen; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -1138,10 +1138,9 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - copied = len; - if (copied > ulen) - copied = ulen; - else if (copied < ulen) + if (len > ulen) + len = ulen; + else if (len < ulen) msg->msg_flags |= MSG_TRUNC; /* @@ -1150,14 +1149,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied); + msg->msg_iov, len); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), @@ -1186,7 +1185,7 @@ try_again: if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); - err = copied; + err = len; if (flags & MSG_TRUNC) err = ulen; @@ -2027,12 +2026,12 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { }, }; -static int udp4_proc_init_net(struct net *net) +static int __net_init udp4_proc_init_net(struct net *net) { return udp_proc_register(net, &udp4_seq_afinfo); } -static void udp4_proc_exit_net(struct net *net) +static void __net_exit udp4_proc_exit_net(struct net *net) { udp_proc_unregister(net, &udp4_seq_afinfo); } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 66f79513f4a..6610bf76369 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -81,12 +81,12 @@ static struct udp_seq_afinfo udplite4_seq_afinfo = { }, }; -static int udplite4_proc_init_net(struct net *net) +static int __net_init udplite4_proc_init_net(struct net *net) { return udp_proc_register(net, &udplite4_seq_afinfo); } -static void udplite4_proc_exit_net(struct net *net) +static void __net_exit udplite4_proc_exit_net(struct net *net) { udp_proc_unregister(net, &udplite4_seq_afinfo); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8c08a28d8f8..67107d63c1c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,6 @@ #include <net/xfrm.h> #include <net/ip.h> -static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, @@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); + + xfrm4_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", - .data = &xfrm4_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void) void __init xfrm4_init(int rt_max_size) { - xfrm4_state_init(); - xfrm4_policy_init(); /* * Select a default value for the gc_thresh based on the main route * table hash size. It seems to me the worst case scenario is when @@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; + + xfrm4_state_init(); + xfrm4_policy_init(); #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); |