diff options
Diffstat (limited to 'net/netfilter')
65 files changed, 2015 insertions, 991 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0c6f67e8f2e..209c1ed4336 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -509,6 +509,21 @@ config NETFILTER_XT_TARGET_HL since you can easily create immortal packets that loop forever on the network. +config NETFILTER_XT_TARGET_HMARK + tristate '"HMARK" target support' + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on NETFILTER_ADVANCED + ---help--- + This option adds the "HMARK" target. + + The target allows you to create rules in the "raw" and "mangle" tables + which set the skbuff mark by means of hash calculation within a given + range. The nfmark can influence the routing method (see "Use netfilter + MARK value as routing key") and can also be used by other subsystems to + change their behaviour. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_IDLETIMER tristate "IDLETIMER target support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca3676586f5..4e7960cc7b9 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o +obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e1b7e051332..e19f3653db2 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -290,12 +290,3 @@ void __init netfilter_init(void) if (netfilter_log_init() < 0) panic("cannot initialize nf_log"); } - -#ifdef CONFIG_SYSCTL -struct ctl_path nf_net_netfilter_sysctl_path[] = { - { .procname = "net", }, - { .procname = "netfilter", }, - { } -}; -EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path); -#endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index a72a4dff003..7e1b061aeeb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -109,8 +109,9 @@ bitmap_ip_list(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, - htonl(map->first_ip + id * map->hosts)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -194,10 +195,11 @@ bitmap_ip_tlist(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, - htonl(map->first_ip + id * map->hosts)); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(members[id]))); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id])))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, adt); @@ -334,15 +336,16 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - if (map->netmask != 32) - NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->memsize)); - if (with_timeout(map->timeout)) - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) || + (map->netmask != 32 && + nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)) || + (with_timeout(map->timeout) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 81324c12c5b..d7eaf10edb6 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -111,7 +111,7 @@ bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags) return -EAGAIN; case MAC_FILLED: return data->ether == NULL || - compare_ether_addr(data->ether, elem->ether) == 0; + ether_addr_equal(data->ether, elem->ether); } return 0; } @@ -186,11 +186,12 @@ bitmap_ipmac_list(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, - htonl(map->first_ip + id)); - if (elem->match == MAC_FILLED) - NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, - elem->ether); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)) || + (elem->match == MAC_FILLED && + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -224,7 +225,7 @@ bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) return -EAGAIN; case MAC_FILLED: return (data->ether == NULL || - compare_ether_addr(data->ether, elem->ether) == 0) && + ether_addr_equal(data->ether, elem->ether)) && !bitmap_expired(map, data->id); } return 0; @@ -314,14 +315,16 @@ bitmap_ipmac_tlist(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, - htonl(map->first_ip + id)); - if (elem->match == MAC_FILLED) - NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, - elem->ether); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)) || + (elem->match == MAC_FILLED && + nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether))) + goto nla_put_failure; timeout = elem->match == MAC_UNSET ? elem->timeout : ip_set_timeout_get(elem->timeout); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)); + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -438,14 +441,16 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) - + (map->last_ip - map->first_ip + 1) * map->dsize)); - if (with_timeout(map->timeout)) - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + + ((map->last_ip - map->first_ip + 1) * + map->dsize))) || + (with_timeout(map->timeout) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 382ec28ba72..b9f1fce7053 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -96,8 +96,9 @@ bitmap_port_list(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, - htons(map->first_port + id)); + if (nla_put_net16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, atd); @@ -183,10 +184,11 @@ bitmap_port_tlist(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, - htons(map->first_port + id)); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(members[id]))); + if (nla_put_net16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id])))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, adt); @@ -320,13 +322,14 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->memsize)); - if (with_timeout(map->timeout)) - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + if (nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) || + nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)) || + (with_timeout(map->timeout) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e6c1c9605a5..819c342f5b3 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1092,19 +1092,21 @@ dump_last: ret = -EMSGSIZE; goto release_refcount; } - NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); - NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) + goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; switch (cb->args[2]) { case 0: /* Core header data */ - NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, - set->type->name); - NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, - set->family); - NLA_PUT_U8(skb, IPSET_ATTR_REVISION, - set->revision); + if (nla_put_string(skb, IPSET_ATTR_TYPENAME, + set->type->name) || + nla_put_u8(skb, IPSET_ATTR_FAMILY, + set->family) || + nla_put_u8(skb, IPSET_ATTR_REVISION, + set->revision)) + goto nla_put_failure; ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; @@ -1410,11 +1412,12 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; - NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); - NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); - NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); - NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); - NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->revision); + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || + nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || + nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || + nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); @@ -1469,11 +1472,12 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; - NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); - NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); - NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); - NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); - NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || + nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || + nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || + nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); @@ -1517,7 +1521,8 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; - NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); @@ -1613,7 +1618,7 @@ static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { static int ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) { - unsigned *op; + unsigned int *op; void *data; int copylen = *len, ret = 0; @@ -1621,7 +1626,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) return -EPERM; if (optval != SO_IP_SET) return -EBADF; - if (*len < sizeof(unsigned)) + if (*len < sizeof(unsigned int)) return -EINVAL; data = vmalloc(*len); @@ -1631,7 +1636,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned *) data; + op = (unsigned int *) data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5139dea6019..a68dbd4f1e4 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -81,7 +81,8 @@ hash_ip4_data_zero_out(struct hash_ip4_elem *elem) static inline bool hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) { - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip)) + goto nla_put_failure; return 0; nla_put_failure: @@ -94,9 +95,10 @@ hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) const struct hash_ip4_telem *tdata = (const struct hash_ip4_telem *)data; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout)))) + goto nla_put_failure; return 0; @@ -262,7 +264,8 @@ ip6_netmask(union nf_inet_addr *ip, u8 prefix) static bool hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) { - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6)) + goto nla_put_failure; return 0; nla_put_failure: @@ -275,9 +278,10 @@ hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) const struct hash_ip6_telem *e = (const struct hash_ip6_telem *)data; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -364,6 +368,7 @@ hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 netmask, hbits; + size_t hsize; struct ip_set_hash *h; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) @@ -405,9 +410,12 @@ hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9c27e249c17..92722bb82ee 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -93,9 +93,10 @@ static bool hash_ipport4_data_list(struct sk_buff *skb, const struct hash_ipport4_elem *data) { - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) + goto nla_put_failure; return 0; nla_put_failure: @@ -109,12 +110,12 @@ hash_ipport4_data_tlist(struct sk_buff *skb, const struct hash_ipport4_telem *tdata = (const struct hash_ipport4_telem *)data; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); - + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -308,9 +309,10 @@ static bool hash_ipport6_data_list(struct sk_buff *skb, const struct hash_ipport6_elem *data) { - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) + goto nla_put_failure; return 0; nla_put_failure: @@ -324,11 +326,12 @@ hash_ipport6_data_tlist(struct sk_buff *skb, const struct hash_ipport6_telem *e = (const struct hash_ipport6_telem *)data; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -449,6 +452,7 @@ hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) struct ip_set_hash *h; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -476,9 +480,12 @@ hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 9134057c072..0637ce096de 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -94,10 +94,11 @@ static bool hash_ipportip4_data_list(struct sk_buff *skb, const struct hash_ipportip4_elem *data) { - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) + goto nla_put_failure; return 0; nla_put_failure: @@ -111,13 +112,13 @@ hash_ipportip4_data_tlist(struct sk_buff *skb, const struct hash_ipportip4_telem *tdata = (const struct hash_ipportip4_telem *)data; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); - + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) || + nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -319,10 +320,11 @@ static bool hash_ipportip6_data_list(struct sk_buff *skb, const struct hash_ipportip6_elem *data) { - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) + goto nla_put_failure; return 0; nla_put_failure: @@ -336,12 +338,13 @@ hash_ipportip6_data_tlist(struct sk_buff *skb, const struct hash_ipportip6_telem *e = (const struct hash_ipportip6_telem *)data; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -467,6 +470,7 @@ hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) struct ip_set_hash *h; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -494,9 +498,12 @@ hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5d05e696986..1ce21ca976e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -124,13 +124,14 @@ hash_ipportnet4_data_list(struct sk_buff *skb, { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -145,16 +146,16 @@ hash_ipportnet4_data_tlist(struct sk_buff *skb, (const struct hash_ipportnet4_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); - + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) || + nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -436,13 +437,14 @@ hash_ipportnet6_data_list(struct sk_buff *skb, { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -457,15 +459,16 @@ hash_ipportnet6_data_tlist(struct sk_buff *skb, (const struct hash_ipportnet6_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -616,6 +619,7 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) struct ip_set_hash *h; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -645,9 +649,12 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 7c3d945517c..c57a6a09906 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -111,10 +111,11 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -128,13 +129,13 @@ hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) (const struct hash_net4_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); - + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_u8(skb, IPSET_ATTR_CIDR, tdata->cidr) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -339,10 +340,11 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -356,12 +358,13 @@ hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) (const struct hash_net6_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, e->cidr) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -460,6 +463,7 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; struct ip_set_hash *h; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -489,9 +493,12 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f24037ff432..ee863943c82 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -252,11 +252,12 @@ hash_netiface4_data_list(struct sk_buff *skb, if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -273,13 +274,14 @@ hash_netiface4_data_tlist(struct sk_buff *skb, if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout)))) + goto nla_put_failure; return 0; @@ -555,11 +557,12 @@ hash_netiface6_data_list(struct sk_buff *skb, if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -576,13 +579,14 @@ hash_netiface6_data_tlist(struct sk_buff *skb, if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); - NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || + nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -722,6 +726,7 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) struct ip_set_hash *h; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -752,9 +757,12 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->ahash_max = AHASH_MAX_SIZE; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index ce2e77100b6..fc3143a2d41 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -124,12 +124,13 @@ hash_netport4_data_list(struct sk_buff *skb, { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -144,15 +145,15 @@ hash_netport4_data_tlist(struct sk_buff *skb, (const struct hash_netport4_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(tdata->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); - + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || + nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -402,12 +403,13 @@ hash_netport6_data_list(struct sk_buff *skb, { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -422,14 +424,15 @@ hash_netport6_data_tlist(struct sk_buff *skb, (const struct hash_netport6_telem *)data; u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); - NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); - NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr + 1); - NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(e->timeout))); - if (flags) - NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)); + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -572,6 +575,7 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) struct ip_set_hash *h; u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; + size_t hsize; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -601,9 +605,12 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) h->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); - h->table = ip_set_alloc( - sizeof(struct htable) - + jhash_size(hbits) * sizeof(struct hbucket)); + hsize = htable_size(hbits); + if (hsize == 0) { + kfree(h); + return -ENOMEM; + } + h->table = ip_set_alloc(hsize); if (!h->table) { kfree(h); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 7e095f9005f..6cb1225765f 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -402,12 +402,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); - if (with_timeout(map->timeout)) - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); - NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize)); + if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || + (with_timeout(map->timeout) && + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->size * map->dsize))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; @@ -442,13 +443,15 @@ list_set_list(const struct ip_set *set, } else goto nla_put_failure; } - NLA_PUT_STRING(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id)); + if (nla_put_string(skb, IPSET_ATTR_NAME, + ip_set_name_byindex(e->id))) + goto nla_put_failure; if (with_timeout(map->timeout)) { const struct set_telem *te = (const struct set_telem *) e; - NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get(te->timeout))); + __be32 to = htonl(ip_set_timeout_get(te->timeout)); + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, to)) + goto nla_put_failure; } ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 52856178c9d..64f9e8f1320 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -313,7 +313,7 @@ vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) * Assumes already checked proto==IPPROTO_TCP and diff!=0. */ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, - unsigned flag, __u32 seq, int diff) + unsigned int flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ spin_lock(&cp->lock); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 29fa5badde7..1548df9a752 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -86,42 +86,42 @@ struct ip_vs_aligned_lock static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned key) +static inline void ct_read_lock(unsigned int key) { read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_read_unlock(unsigned key) +static inline void ct_read_unlock(unsigned int key) { read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_write_lock(unsigned key) +static inline void ct_write_lock(unsigned int key) { write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_write_unlock(unsigned key) +static inline void ct_write_unlock(unsigned int key) { write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_read_lock_bh(unsigned key) +static inline void ct_read_lock_bh(unsigned int key) { read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_read_unlock_bh(unsigned key) +static inline void ct_read_unlock_bh(unsigned int key) { read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_write_lock_bh(unsigned key) +static inline void ct_write_lock_bh(unsigned int key) { write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static inline void ct_write_unlock_bh(unsigned key) +static inline void ct_write_unlock_bh(unsigned int key) { write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } @@ -130,7 +130,7 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto, +static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { @@ -188,7 +188,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) { - unsigned hash; + unsigned int hash; int ret; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) @@ -224,7 +224,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) */ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) { - unsigned hash; + unsigned int hash; int ret; /* unhash it and decrease its reference counter */ @@ -257,7 +257,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) static inline struct ip_vs_conn * __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { - unsigned hash; + unsigned int hash; struct ip_vs_conn *cp; struct hlist_node *n; @@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { - unsigned hash; + unsigned int hash; struct ip_vs_conn *cp; struct hlist_node *n; @@ -394,7 +394,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) * p->vaddr, p->vport: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { - unsigned hash; + unsigned int hash; struct ip_vs_conn *cp, *ret=NULL; struct hlist_node *n; @@ -548,6 +548,7 @@ static inline void ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) { unsigned int conn_flags; + __u32 flags; /* if dest is NULL, then return directly */ if (!dest) @@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) conn_flags = atomic_read(&dest->conn_flags); if (cp->protocol != IPPROTO_UDP) conn_flags &= ~IP_VS_CONN_F_ONE_PACKET; + flags = cp->flags; /* Bind with the destination and its corresponding transmitter */ - if (cp->flags & IP_VS_CONN_F_SYNC) { + if (flags & IP_VS_CONN_F_SYNC) { /* if the connection is not template and is created * by sync, preserve the activity flag. */ - if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) + if (!(flags & IP_VS_CONN_F_TEMPLATE)) conn_flags &= ~IP_VS_CONN_F_INACTIVE; /* connections inherit forwarding method from dest */ - cp->flags &= ~IP_VS_CONN_F_FWD_MASK; + flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT); } - cp->flags |= conn_flags; + flags |= conn_flags; + cp->flags = flags; cp->dest = dest; IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " @@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { - /* It is a normal connection, so increase the inactive - connection counter because it is in TCP SYNRECV - state (inactive) or other protocol inacive state */ - if ((cp->flags & IP_VS_CONN_F_SYNC) && - (!(cp->flags & IP_VS_CONN_F_INACTIVE))) + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { + /* It is a normal connection, so modify the counters + * according to the flags, later the protocol can + * update them on state change + */ + if (!(flags & IP_VS_CONN_F_INACTIVE)) atomic_inc(&dest->activeconns); else atomic_inc(&dest->inactconns); @@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) { struct ip_vs_dest *dest; - if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, - cp->dport, &cp->vaddr, cp->vport, - cp->protocol, cp->fwmark, cp->flags); + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + cp->dport, &cp->vaddr, cp->vport, + cp->protocol, cp->fwmark, cp->flags); + if (dest) { + struct ip_vs_proto_data *pd; + + spin_lock(&cp->lock); + if (cp->dest) { + spin_unlock(&cp->lock); + return dest; + } + + /* Applications work depending on the forwarding method + * but better to reassign them always when binding dest */ + if (cp->app) + ip_vs_unbind_app(cp); + ip_vs_bind_dest(cp, dest); - return dest; - } else - return NULL; + spin_unlock(&cp->lock); + + /* Update its packet transmitter */ + cp->packet_xmit = NULL; +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + ip_vs_bind_xmit_v6(cp); + else +#endif + ip_vs_bind_xmit(cp); + + pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol); + if (pd && atomic_read(&pd->appcnt)) + ip_vs_bind_app(cp, pd->pp); + } + return dest; } @@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct net *net = ip_vs_conn_net(cp); + struct netns_ipvs *ipvs = net_ipvs(net); cp->timeout = 60*HZ; @@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data) atomic_read(&cp->refcnt)-1, atomic_read(&cp->n_control)); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); + ip_vs_conn_put(cp); } @@ -824,7 +857,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) */ struct ip_vs_conn * ip_vs_conn_new(const struct ip_vs_conn_param *p, - const union nf_inet_addr *daddr, __be16 dport, unsigned flags, + const union nf_inet_addr *daddr, __be16 dport, unsigned int flags, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; @@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, /* Set its state and timeout */ cp->state = 0; cp->timeout = 3*HZ; + cp->sync_endtime = jiffies & ~3UL; /* Bind its packet transmitter */ #ifdef CONFIG_IP_VS_IPV6 @@ -1057,7 +1091,7 @@ static const struct file_operations ip_vs_conn_fops = { .release = seq_release_net, }; -static const char *ip_vs_origin_name(unsigned flags) +static const char *ip_vs_origin_name(unsigned int flags) { if (flags & IP_VS_CONN_F_SYNC) return "SYNC"; @@ -1169,7 +1203,7 @@ void ip_vs_random_dropentry(struct net *net) * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { - unsigned hash = net_random() & ip_vs_conn_tab_mask; + unsigned int hash = net_random() & ip_vs_conn_tab_mask; struct hlist_node *n; /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2555816e778..a54b018c6ee 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -80,7 +80,7 @@ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); #define icmp_id(icmph) (((icmph)->un).echo.id) #define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) -const char *ip_vs_proto_name(unsigned proto) +const char *ip_vs_proto_name(unsigned int proto) { static char buf[20]; @@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else pkts = atomic_add_return(1, &cp->in_pkts); - if ((ipvs->sync_state & IP_VS_STATE_MASTER) && - cp->protocol == IPPROTO_SCTP) { - if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % sysctl_sync_period(ipvs) - == sysctl_sync_threshold(ipvs))) || - (cp->old_state != cp->state && - ((cp->state == IP_VS_SCTP_S_CLOSED) || - (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || - (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { - ip_vs_sync_conn(net, cp); - goto out; - } - } - - /* Keep this block last: TCP and others with pp->num_states <= 1 */ - else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && - (((cp->protocol != IPPROTO_TCP || - cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_sync_period(ipvs) - == sysctl_sync_threshold(ipvs))) || - ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && - ((cp->state == IP_VS_TCP_S_FIN_WAIT) || - (cp->state == IP_VS_TCP_S_CLOSE) || - (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || - (cp->state == IP_VS_TCP_S_TIME_WAIT))))) - ip_vs_sync_conn(net, cp); -out: - cp->old_state = cp->state; + if (ipvs->sync_state & IP_VS_STATE_MASTER) + ip_vs_sync_conn(net, cp, pkts); ip_vs_conn_put(cp); return ret; @@ -1924,6 +1898,7 @@ protocol_fail: control_fail: ip_vs_estimator_net_cleanup(net); estimator_fail: + net->ipvs = NULL; return -ENOMEM; } @@ -1936,6 +1911,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net) ip_vs_control_net_cleanup(net); ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) @@ -1993,10 +1969,18 @@ static int __init ip_vs_init(void) goto cleanup_dev; } + ret = ip_vs_register_nl_ioctl(); + if (ret < 0) { + pr_err("can't register netlink/ioctl.\n"); + goto cleanup_hooks; + } + pr_info("ipvs loaded.\n"); return ret; +cleanup_hooks: + nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: @@ -2012,6 +1996,7 @@ exit: static void __exit ip_vs_cleanup(void) { + ip_vs_unregister_nl_ioctl(); nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b3afe189af6..dd811b8dd97 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -265,11 +265,11 @@ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* * Returns hash value for virtual service */ -static inline unsigned -ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, +static inline unsigned int +ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { - register unsigned porth = ntohs(port); + register unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 @@ -286,7 +286,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, /* * Returns hash value of fwmark for virtual service lookup */ -static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) +static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) { return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } @@ -298,7 +298,7 @@ static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) */ static int ip_vs_svc_hash(struct ip_vs_service *svc) { - unsigned hash; + unsigned int hash; if (svc->flags & IP_VS_SVC_F_HASHED) { pr_err("%s(): request for already hashed, called from %pF\n", @@ -361,7 +361,7 @@ static inline struct ip_vs_service * __ip_vs_service_find(struct net *net, int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { - unsigned hash; + unsigned int hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ @@ -388,7 +388,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, static inline struct ip_vs_service * __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) { - unsigned hash; + unsigned int hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ @@ -489,11 +489,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static inline unsigned ip_vs_rs_hashkey(int af, +static inline unsigned int ip_vs_rs_hashkey(int af, const union nf_inet_addr *addr, __be16 port) { - register unsigned porth = ntohs(port); + register unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 @@ -512,7 +512,7 @@ static inline unsigned ip_vs_rs_hashkey(int af, */ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { - unsigned hash; + unsigned int hash; if (!list_empty(&dest->d_list)) { return 0; @@ -555,7 +555,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, __be16 dport) { struct netns_ipvs *ipvs = net_ipvs(net); - unsigned hash; + unsigned int hash; struct ip_vs_dest *dest; /* @@ -842,7 +842,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; - unsigned atype; + unsigned int atype; EnterFunction(2); @@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net) } #ifdef CONFIG_SYSCTL + +static int zero; +static int three = 3; + static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write, memcpy(val, valp, sizeof(val)); rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { + if (write && (valp[0] < 0 || valp[1] < 0 || + (valp[0] >= valp[1] && valp[1]))) { /* Restore the correct value */ memcpy(valp, val, sizeof(val)); } @@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write, if ((*valp < 0) || (*valp > 1)) { /* Restore the correct value */ *valp = val; - } else { - struct net *net = current->nsproxy->net_ns; - ip_vs_sync_switch_mode(net, val); + } + } + return rc; +} + +static int +proc_do_sync_ports(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = table->data; + int val = *valp; + int rc; + + rc = proc_dointvec(table, write, buffer, lenp, ppos); + if (write && (*valp != val)) { + if (*valp < 1 || !is_power_of_2(*valp)) { + /* Restore the correct value */ + *valp = val; } } return rc; @@ -1718,6 +1738,24 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_sync_mode, }, { + .procname = "sync_ports", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_do_sync_ports, + }, + { + .procname = "sync_qlen_max", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_sock_size", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "cache_bypass", .maxlen = sizeof(int), .mode = 0644, @@ -1743,6 +1781,20 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_do_sync_threshold, }, { + .procname = "sync_refresh_period", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "sync_retries", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &three, + }, + { .procname = "nat_icmp_send", .maxlen = sizeof(int), .mode = 0644, @@ -1846,13 +1898,6 @@ static struct ctl_table vs_vars[] = { { } }; -const struct ctl_path net_vs_ctl_path[] = { - { .procname = "net", }, - { .procname = "ipv4", }, - { .procname = "vs", }, - { } -}; -EXPORT_SYMBOL_GPL(net_vs_ctl_path); #endif #ifdef CONFIG_PROC_FS @@ -1867,7 +1912,7 @@ struct ip_vs_iter { * Write the contents of the VS rule table to a PROCfs file. * (It is kept just for backward compatibility) */ -static inline const char *ip_vs_fwd_name(unsigned flags) +static inline const char *ip_vs_fwd_name(unsigned int flags) { switch (flags & IP_VS_CONN_F_FWD_MASK) { case IP_VS_CONN_F_LOCALNODE: @@ -2816,17 +2861,17 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, ip_vs_copy_stats(&ustats, stats); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps); - + if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || + nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) + goto nla_put_failure; nla_nest_end(skb, nl_stats); return 0; @@ -2847,23 +2892,25 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, if (!nl_service) return -EMSGSIZE; - NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); - + if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) + goto nla_put_failure; if (svc->fwmark) { - NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); + if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) + goto nla_put_failure; } else { - NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); - NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); - NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); + if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || + nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || + nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port)) + goto nla_put_failure; } - NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); - if (svc->pe) - NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name); - NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); - NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); - NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); - + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || + (svc->pe && + nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || + nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || + nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || + nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) + goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) goto nla_put_failure; @@ -3038,21 +3085,22 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) if (!nl_dest) return -EMSGSIZE; - NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); - NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); - - NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, - atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, - atomic_read(&dest->activeconns)); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, - atomic_read(&dest->inactconns)); - NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, - atomic_read(&dest->persistconns)); - + if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || + nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) || + nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, + (atomic_read(&dest->conn_flags) & + IP_VS_CONN_F_FWD_MASK)) || + nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, + atomic_read(&dest->weight)) || + nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || + nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || + nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, + atomic_read(&dest->activeconns)) || + nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, + atomic_read(&dest->inactconns)) || + nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, + atomic_read(&dest->persistconns))) + goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) goto nla_put_failure; @@ -3181,10 +3229,10 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, if (!nl_daemon) return -EMSGSIZE; - NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); - NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); - NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); - + if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || + nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || + nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) + goto nla_put_failure; nla_nest_end(skb, nl_daemon); return 0; @@ -3473,21 +3521,26 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) __ip_vs_get_timeouts(net, &t); #ifdef CONFIG_IP_VS_PROTO_TCP - NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); - NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, - t.tcp_fin_timeout); + if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, + t.tcp_timeout) || + nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, + t.tcp_fin_timeout)) + goto nla_put_failure; #endif #ifdef CONFIG_IP_VS_PROTO_UDP - NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); + if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) + goto nla_put_failure; #endif break; } case IPVS_CMD_GET_INFO: - NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); - NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, - ip_vs_conn_tab_size); + if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, + IP_VS_VERSION_CODE) || + nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, + ip_vs_conn_tab_size)) + goto nla_put_failure; break; } @@ -3654,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_snat_reroute; ipvs->sysctl_sync_ver = 1; tbl[idx++].data = &ipvs->sysctl_sync_ver; + ipvs->sysctl_sync_ports = 1; + tbl[idx++].data = &ipvs->sysctl_sync_ports; + ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; + tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; + ipvs->sysctl_sync_sock_size = 0; + tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; @@ -3661,11 +3720,14 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); + ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; + tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; + ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); + tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; - ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, - tbl); + ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { if (!net_eq(net, &init_net)) kfree(tbl); @@ -3680,7 +3742,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3692,7 +3754,7 @@ void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) #else int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { } +void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif @@ -3750,21 +3812,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -int __init ip_vs_control_init(void) +int __init ip_vs_register_nl_ioctl(void) { - int idx; int ret; - EnterFunction(2); - - /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_svc_table[idx]); - INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); @@ -3776,28 +3827,47 @@ int __init ip_vs_control_init(void) pr_err("cannot register Generic Netlink interface.\n"); goto err_genl; } - - ret = register_netdevice_notifier(&ip_vs_dst_notifier); - if (ret < 0) - goto err_notf; - - LeaveFunction(2); return 0; -err_notf: - ip_vs_genl_unregister(); err_genl: nf_unregister_sockopt(&ip_vs_sockopts); err_sock: return ret; } +void ip_vs_unregister_nl_ioctl(void) +{ + ip_vs_genl_unregister(); + nf_unregister_sockopt(&ip_vs_sockopts); +} + +int __init ip_vs_control_init(void) +{ + int idx; + int ret; + + EnterFunction(2); + + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_LIST_HEAD(&ip_vs_svc_table[idx]); + INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); + } + + smp_wmb(); /* Do we really need it now ? */ + + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + return ret; + + LeaveFunction(2); + return 0; +} + void ip_vs_control_cleanup(void) { EnterFunction(2); unregister_netdevice_notifier(&ip_vs_dst_notifier); - ip_vs_genl_unregister(); - nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 1c269e56200..8b7dca9ea42 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -68,7 +68,7 @@ struct ip_vs_dh_bucket { /* * Returns hash value for IPVS DH entry */ -static inline unsigned ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr) { __be32 addr_fold = addr->ip; @@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) /* allocate the DH table for this service */ tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, - GFP_ATOMIC); + GFP_KERNEL); if (tbl == NULL) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 538d74ee4f6..b20b29c903e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -177,7 +177,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ - unsigned buf_len; + unsigned int buf_len; int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; @@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net) struct ip_vs_app *app; struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); if (!app) return -ENOMEM; @@ -483,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = { .exit = __ip_vs_ftp_exit, }; -int __init ip_vs_ftp_init(void) +static int __init ip_vs_ftp_init(void) { int rv; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 0f16283fd05..df646ccf08a 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -142,7 +142,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) /* * Returns hash value for IPVS LBLC entry */ -static inline unsigned +static inline unsigned int ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr) { __be32 addr_fold = addr->ip; @@ -163,7 +163,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr) static void ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { - unsigned hash = ip_vs_lblc_hashkey(en->af, &en->addr); + unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); @@ -178,7 +178,7 @@ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, const union nf_inet_addr *addr) { - unsigned hash = ip_vs_lblc_hashkey(af, addr); + unsigned int hash = ip_vs_lblc_hashkey(af, addr); struct ip_vs_lblc_entry *en; list_for_each_entry(en, &tbl->bucket[hash], list) @@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblc_table for this service */ - tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); if (tbl == NULL) return -ENOMEM; @@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblc_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), @@ -563,8 +566,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net) ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; ipvs->lblc_ctl_header = - register_net_sysctl_table(net, net_vs_ctl_path, - ipvs->lblc_ctl_table); + register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table); if (!ipvs->lblc_ctl_header) { if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index eec797f8cce..570e31ea427 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -311,7 +311,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) /* * Returns hash value for IPVS LBLCR entry */ -static inline unsigned +static inline unsigned int ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr) { __be32 addr_fold = addr->ip; @@ -332,7 +332,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr) static void ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { - unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr); + unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); @@ -347,7 +347,7 @@ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *addr) { - unsigned hash = ip_vs_lblcr_hashkey(af, addr); + unsigned int hash = ip_vs_lblcr_hashkey(af, addr); struct ip_vs_lblcr_entry *en; list_for_each_entry(en, &tbl->bucket[hash], list) @@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblcr_table for this service */ - tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); if (tbl == NULL) return -ENOMEM; @@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!ipvs) + return -ENOENT; + if (!net_eq(net, &init_net)) { ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), @@ -757,8 +760,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; ipvs->lblcr_ctl_header = - register_net_sysctl_table(net, net_vs_ctl_path, - ipvs->lblcr_ctl_table); + register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table); if (!ipvs->lblcr_ctl_header) { if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index f843a883325..50d82186da8 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -48,7 +48,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; */ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) { - unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); pp->next = ip_vs_proto_table[hash]; ip_vs_proto_table[hash] = pp; @@ -59,9 +59,6 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } -#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \ - defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \ - defined(CONFIG_IP_VS_PROTO_ESP) /* * register an ipvs protocols netns related data */ @@ -69,9 +66,9 @@ static int register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) { struct netns_ipvs *ipvs = net_ipvs(net); - unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); struct ip_vs_proto_data *pd = - kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); + kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); if (!pd) return -ENOMEM; @@ -81,12 +78,18 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) ipvs->proto_data_table[hash] = pd; atomic_set(&pd->appcnt, 0); /* Init app counter */ - if (pp->init_netns != NULL) - pp->init_netns(net, pd); + if (pp->init_netns != NULL) { + int ret = pp->init_netns(net, pd); + if (ret) { + /* unlink an free proto data */ + ipvs->proto_data_table[hash] = pd->next; + kfree(pd); + return ret; + } + } return 0; } -#endif /* * unregister an ipvs protocol @@ -94,7 +97,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) { struct ip_vs_protocol **pp_p; - unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); pp_p = &ip_vs_proto_table[hash]; for (; *pp_p; pp_p = &(*pp_p)->next) { @@ -117,7 +120,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data **pd_p; - unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); + unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); pd_p = &ipvs->proto_data_table[hash]; for (; *pd_p; pd_p = &(*pd_p)->next) { @@ -139,7 +142,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) { struct ip_vs_protocol *pp; - unsigned hash = IP_VS_PROTO_HASH(proto); + unsigned int hash = IP_VS_PROTO_HASH(proto); for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { if (pp->protocol == proto) @@ -153,11 +156,11 @@ EXPORT_SYMBOL(ip_vs_proto_get); /* * get ip_vs_protocol object data by netns and proto */ -struct ip_vs_proto_data * +static struct ip_vs_proto_data * __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { struct ip_vs_proto_data *pd; - unsigned hash = IP_VS_PROTO_HASH(proto); + unsigned int hash = IP_VS_PROTO_HASH(proto); for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { if (pd->pp->protocol == proto) @@ -196,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) int * ip_vs_create_timeout_table(int *table, int size) { - return kmemdup(table, size, GFP_ATOMIC); + return kmemdup(table, size, GFP_KERNEL); } @@ -316,22 +319,35 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ int __net_init ip_vs_protocol_net_init(struct net *net) { + int i, ret; + static struct ip_vs_protocol *protos[] = { #ifdef CONFIG_IP_VS_PROTO_TCP - register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); + &ip_vs_protocol_tcp, #endif #ifdef CONFIG_IP_VS_PROTO_UDP - register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); + &ip_vs_protocol_udp, #endif #ifdef CONFIG_IP_VS_PROTO_SCTP - register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); + &ip_vs_protocol_sctp, #endif #ifdef CONFIG_IP_VS_PROTO_AH - register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); + &ip_vs_protocol_ah, #endif #ifdef CONFIG_IP_VS_PROTO_ESP - register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); + &ip_vs_protocol_esp, #endif + }; + + for (i = 0; i < ARRAY_SIZE(protos); i++) { + ret = register_ip_vs_proto_netns(net, protos[i]); + if (ret < 0) + goto cleanup; + } return 0; + +cleanup: + ip_vs_protocol_net_cleanup(net); + return ret; } void __net_exit ip_vs_protocol_net_cleanup(struct net *net) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1fbf7a2816f..9f3fb751c49 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1090,7 +1090,7 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index ef8641f7af8..cd609cc6272 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; pd->tcp_state_table = tcp_states; + return 0; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f4b7262896b..2fedb2dcb3d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 069e8d4d5c0..05126521743 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -70,7 +70,7 @@ struct ip_vs_sh_bucket { /* * Returns hash value for IPVS SH entry */ -static inline unsigned ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) { __be32 addr_fold = addr->ip; @@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) /* allocate the SH table for this service */ tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, - GFP_ATOMIC); + GFP_KERNEL); if (tbl == NULL) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 8a0d6d6889f..effa10c9e4e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data { struct net *net; struct socket *sock; char *buf; + int id; }; /* Version 0 definition of packet sizes */ @@ -271,13 +272,6 @@ struct ip_vs_sync_buff { unsigned char *end; }; -/* multicast addr */ -static struct sockaddr_in mcast_addr = { - .sin_family = AF_INET, - .sin_port = cpu_to_be16(IP_VS_SYNC_PORT), - .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), -}; - /* * Copy of struct ip_vs_seq * From unaligned network order to aligned host order @@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) put_unaligned_be32(ho->previous_delta, &no->previous_delta); } -static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) +static inline struct ip_vs_sync_buff * +sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_lock); - if (list_empty(&ipvs->sync_queue)) { + if (list_empty(&ms->sync_queue)) { sb = NULL; + __set_current_state(TASK_INTERRUPTIBLE); } else { - sb = list_entry(ipvs->sync_queue.next, - struct ip_vs_sync_buff, + sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); + ms->sync_queue_len--; + if (!ms->sync_queue_len) + ms->sync_queue_delay = 0; } spin_unlock_bh(&ipvs->sync_lock); @@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs) kfree(sb); return NULL; } - sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ + sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->version = SYNC_PROTO_VER; sb->mesg->syncid = ipvs->master_syncid; sb->mesg->size = sizeof(struct ip_vs_sync_mesg); @@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } -static inline void sb_queue_tail(struct netns_ipvs *ipvs) +static inline void sb_queue_tail(struct netns_ipvs *ipvs, + struct ipvs_master_sync_state *ms) { - struct ip_vs_sync_buff *sb = ipvs->sync_buff; + struct ip_vs_sync_buff *sb = ms->sync_buff; spin_lock(&ipvs->sync_lock); - if (ipvs->sync_state & IP_VS_STATE_MASTER) - list_add_tail(&sb->list, &ipvs->sync_queue); - else + if (ipvs->sync_state & IP_VS_STATE_MASTER && + ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { + if (!ms->sync_queue_len) + schedule_delayed_work(&ms->master_wakeup_work, + max(IPVS_SYNC_SEND_DELAY, 1)); + ms->sync_queue_len++; + list_add_tail(&sb->list, &ms->sync_queue); + if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) + wake_up_process(ms->master_thread); + } else ip_vs_sync_buff_release(sb); spin_unlock(&ipvs->sync_lock); } @@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs) * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * -get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) +get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, + unsigned long time) { struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_buff_lock); - if (ipvs->sync_buff && - time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) { - sb = ipvs->sync_buff; - ipvs->sync_buff = NULL; + sb = ms->sync_buff; + if (sb && time_after_eq(jiffies - sb->firstuse, time)) { + ms->sync_buff = NULL; + __set_current_state(TASK_RUNNING); } else sb = NULL; spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } -/* - * Switch mode from sending version 0 or 1 - * - must handle sync_buf - */ -void ip_vs_sync_switch_mode(struct net *net, int mode) +static inline int +select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(net); - - if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) - return; - if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff) - return; - - spin_lock_bh(&ipvs->sync_buff_lock); - /* Buffer empty ? then let buf_create do the job */ - if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { - kfree(ipvs->sync_buff); - ipvs->sync_buff = NULL; - } else { - spin_lock_bh(&ipvs->sync_lock); - if (ipvs->sync_state & IP_VS_STATE_MASTER) - list_add_tail(&ipvs->sync_buff->list, - &ipvs->sync_queue); - else - ip_vs_sync_buff_release(ipvs->sync_buff); - spin_unlock_bh(&ipvs->sync_lock); - } - spin_unlock_bh(&ipvs->sync_buff_lock); + return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; } /* @@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) return sb; } +/* Check if conn should be synced. + * pkts: conn packets, use sysctl_sync_threshold to avoid packet check + * - (1) sync_refresh_period: reduce sync rate. Additionally, retry + * sync_retries times with period of sync_refresh_period/8 + * - (2) if both sync_refresh_period and sync_period are 0 send sync only + * for state changes or only once when pkts matches sync_threshold + * - (3) templates: rate can be reduced only with sync_refresh_period or + * with (2) + */ +static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, + struct ip_vs_conn *cp, int pkts) +{ + unsigned long orig = ACCESS_ONCE(cp->sync_endtime); + unsigned long now = jiffies; + unsigned long n = (now + cp->timeout) & ~3UL; + unsigned int sync_refresh_period; + int sync_period; + int force; + + /* Check if we sync in current state */ + if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) + force = 0; + else if (likely(cp->protocol == IPPROTO_TCP)) { + if (!((1 << cp->state) & + ((1 << IP_VS_TCP_S_ESTABLISHED) | + (1 << IP_VS_TCP_S_FIN_WAIT) | + (1 << IP_VS_TCP_S_CLOSE) | + (1 << IP_VS_TCP_S_CLOSE_WAIT) | + (1 << IP_VS_TCP_S_TIME_WAIT)))) + return 0; + force = cp->state != cp->old_state; + if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) + goto set; + } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { + if (!((1 << cp->state) & + ((1 << IP_VS_SCTP_S_ESTABLISHED) | + (1 << IP_VS_SCTP_S_CLOSED) | + (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | + (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) + return 0; + force = cp->state != cp->old_state; + if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) + goto set; + } else { + /* UDP or another protocol with single state */ + force = 0; + } + + sync_refresh_period = sysctl_sync_refresh_period(ipvs); + if (sync_refresh_period > 0) { + long diff = n - orig; + long min_diff = max(cp->timeout >> 1, 10UL * HZ); + + /* Avoid sync if difference is below sync_refresh_period + * and below the half timeout. + */ + if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { + int retries = orig & 3; + + if (retries >= sysctl_sync_retries(ipvs)) + return 0; + if (time_before(now, orig - cp->timeout + + (sync_refresh_period >> 3))) + return 0; + n |= retries + 1; + } + } + sync_period = sysctl_sync_period(ipvs); + if (sync_period > 0) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && + pkts % sync_period != sysctl_sync_threshold(ipvs)) + return 0; + } else if (sync_refresh_period <= 0 && + pkts != sysctl_sync_threshold(ipvs)) + return 0; + +set: + cp->old_state = cp->state; + n = cmpxchg(&cp->sync_endtime, orig, n); + return n == orig || force; +} + /* * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) +static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, + int pkts) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; + struct ip_vs_sync_buff *buff; + struct ipvs_master_sync_state *ms; + int id; int len; if (unlikely(cp->af != AF_INET)) @@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; + if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) + return; + spin_lock(&ipvs->sync_buff_lock); - if (!ipvs->sync_buff) { - ipvs->sync_buff = - ip_vs_sync_buff_create_v0(ipvs); - if (!ipvs->sync_buff) { + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { + spin_unlock(&ipvs->sync_buff_lock); + return; + } + + id = select_master_thread_id(ipvs, cp); + ms = &ipvs->ms[id]; + buff = ms->sync_buff; + if (buff) { + m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; + /* Send buffer if it is for v1 */ + if (!m->nr_conns) { + sb_queue_tail(ipvs, ms); + ms->sync_buff = NULL; + buff = NULL; + } + } + if (!buff) { + buff = ip_vs_sync_buff_create_v0(ipvs); + if (!buff) { spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } + ms->sync_buff = buff; } len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; - m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; - s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; + m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; + s = (struct ip_vs_sync_conn_v0 *) buff->head; /* copy members */ s->reserved = 0; @@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) m->nr_conns++; m->size += len; - ipvs->sync_buff->head += len; + buff->head += len; /* check if there is a space for next one */ - if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { - sb_queue_tail(ipvs); - ipvs->sync_buff = NULL; + if (buff->head + FULL_CONN_SIZE > buff->end) { + sb_queue_tail(ipvs, ms); + ms->sync_buff = NULL; } spin_unlock(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ - if (cp->control) - ip_vs_sync_conn(net, cp->control); + cp = cp->control; + if (cp) { + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + pkts = atomic_add_return(1, &cp->in_pkts); + else + pkts = sysctl_sync_threshold(ipvs); + ip_vs_sync_conn(net, cp->control, pkts); + } } /* @@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) +void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; + struct ip_vs_sync_buff *buff; + struct ipvs_master_sync_state *ms; + int id; __u8 *p; unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ if (sysctl_sync_ver(ipvs) == 0) { - ip_vs_sync_conn_v0(net, cp); + ip_vs_sync_conn_v0(net, cp, pkts); return; } /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) goto control; sloop: + if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) + goto control; + /* Sanity checks */ pe_name_len = 0; if (cp->pe_data_len) { @@ -541,6 +642,13 @@ sloop: } spin_lock(&ipvs->sync_buff_lock); + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { + spin_unlock(&ipvs->sync_buff_lock); + return; + } + + id = select_master_thread_id(ipvs, cp); + ms = &ipvs->ms[id]; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -559,27 +667,32 @@ sloop: /* check if there is a space for this one */ pad = 0; - if (ipvs->sync_buff) { - pad = (4 - (size_t)ipvs->sync_buff->head) & 3; - if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { - sb_queue_tail(ipvs); - ipvs->sync_buff = NULL; + buff = ms->sync_buff; + if (buff) { + m = buff->mesg; + pad = (4 - (size_t) buff->head) & 3; + /* Send buffer if it is for v0 */ + if (buff->head + len + pad > buff->end || m->reserved) { + sb_queue_tail(ipvs, ms); + ms->sync_buff = NULL; + buff = NULL; pad = 0; } } - if (!ipvs->sync_buff) { - ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); - if (!ipvs->sync_buff) { + if (!buff) { + buff = ip_vs_sync_buff_create(ipvs); + if (!buff) { spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } + ms->sync_buff = buff; + m = buff->mesg; } - m = ipvs->sync_buff->mesg; - p = ipvs->sync_buff->head; - ipvs->sync_buff->head += pad + len; + p = buff->head; + buff->head += pad + len; m->size += pad + len; /* Add ev. padding from prev. sync_conn */ while (pad--) @@ -644,16 +757,10 @@ control: cp = cp->control; if (!cp) return; - /* - * Reduce sync rate for templates - * i.e only increment in_pkts for Templates. - */ - if (cp->flags & IP_VS_CONN_F_TEMPLATE) { - int pkts = atomic_add_return(1, &cp->in_pkts); - - if (pkts % sysctl_sync_period(ipvs) != 1) - return; - } + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + pkts = atomic_add_return(1, &cp->in_pkts); + else + pkts = sysctl_sync_threshold(ipvs); goto sloop; } @@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, else cp = ip_vs_ct_in_get(param); - if (cp && param->pe_data) /* Free pe_data */ + if (cp) { + /* Free pe_data */ kfree(param->pe_data); - if (!cp) { + + dest = cp->dest; + spin_lock(&cp->lock); + if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && + !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { + if (flags & IP_VS_CONN_F_INACTIVE) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + } else { + atomic_inc(&dest->activeconns); + atomic_dec(&dest->inactconns); + } + } + flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; + flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; + cp->flags = flags; + spin_unlock(&cp->lock); + if (!dest) { + dest = ip_vs_try_bind_dest(cp); + if (dest) + atomic_dec(&dest->refcnt); + } + } else { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound @@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); - /* Set the approprite ativity flag */ - if (protocol == IPPROTO_TCP) { - if (state != IP_VS_TCP_S_ESTABLISHED) - flags |= IP_VS_CONN_F_INACTIVE; - else - flags &= ~IP_VS_CONN_F_INACTIVE; - } else if (protocol == IPPROTO_SCTP) { - if (state != IP_VS_SCTP_S_ESTABLISHED) - flags |= IP_VS_CONN_F_INACTIVE; - else - flags &= ~IP_VS_CONN_F_INACTIVE; - } cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); if (dest) atomic_dec(&dest->refcnt); @@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } - } else if (!cp->dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - atomic_dec(&dest->refcnt); - } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && - (cp->state != state)) { - /* update active/inactive flag for the connection */ - dest = cp->dest; - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (state != IP_VS_TCP_S_ESTABLISHED)) { - atomic_dec(&dest->activeconns); - atomic_inc(&dest->inactconns); - cp->flags |= IP_VS_CONN_F_INACTIVE; - } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && - (state == IP_VS_TCP_S_ESTABLISHED)) { - atomic_inc(&dest->activeconns); - atomic_dec(&dest->inactconns); - cp->flags &= ~IP_VS_CONN_F_INACTIVE; - } - } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && - (cp->state != state)) { - dest = cp->dest; - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (state != IP_VS_SCTP_S_ESTABLISHED)) { - atomic_dec(&dest->activeconns); - atomic_inc(&dest->inactconns); - cp->flags &= ~IP_VS_CONN_F_INACTIVE; - } } if (opt) @@ -839,7 +929,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); for (i=0; i<m->nr_conns; i++) { - unsigned flags, state; + unsigned int flags, state; if (p + SIMPLE_CONN_SIZE > buffer+buflen) { IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); @@ -1109,7 +1199,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, for (i=0; i<nr_conns; i++) { union ip_vs_sync_conn *s; - unsigned size; + unsigned int size; int retc; p = msg_end; @@ -1149,6 +1239,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, /* + * Setup sndbuf (mode=1) or rcvbuf (mode=0) + */ +static void set_sock_size(struct sock *sk, int mode, int val) +{ + /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ + /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ + lock_sock(sk); + if (mode) { + val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, + sysctl_wmem_max); + sk->sk_sndbuf = val * 2; + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } else { + val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, + sysctl_rmem_max); + sk->sk_rcvbuf = val * 2; + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } + release_sock(sk); +} + +/* * Setup loopback of outgoing multicasts on a sending socket */ static void set_mcast_loop(struct sock *sk, u_char loop) @@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct net *net) +static struct socket *make_send_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); + /* multicast addr */ + struct sockaddr_in mcast_addr = { + .sin_family = AF_INET, + .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), + .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), + }; struct socket *sock; int result; @@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); + result = sysctl_sync_sock_size(ipvs); + if (result > 0) + set_sock_size(sock->sk, 1, result); result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); if (result < 0) { @@ -1349,9 +1470,15 @@ error: /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct net *net) +static struct socket *make_receive_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); + /* multicast addr */ + struct sockaddr_in mcast_addr = { + .sin_family = AF_INET, + .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), + .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), + }; struct socket *sock; int result; @@ -1368,7 +1495,10 @@ static struct socket *make_receive_sock(struct net *net) */ sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ - sock->sk->sk_reuse = 1; + sock->sk->sk_reuse = SK_CAN_REUSE; + result = sysctl_sync_sock_size(ipvs); + if (result > 0) + set_sock_size(sock->sk, 0, result); result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, sizeof(struct sockaddr)); @@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) return len; } -static void +static int ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) { int msize; + int ret; msize = msg->size; /* Put size in network byte order */ msg->size = htons(msg->size); - if (ip_vs_send_async(sock, (char *)msg, msize) != msize) - pr_err("ip_vs_send_async error\n"); + ret = ip_vs_send_async(sock, (char *)msg, msize); + if (ret >= 0 || ret == -EAGAIN) + return ret; + pr_err("ip_vs_send_async error %d\n", ret); + return 0; } static int @@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) iov.iov_base = buffer; iov.iov_len = (size_t)buflen; - len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0); + len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); if (len < 0) - return -1; + return len; LeaveFunction(7); return len; } +/* Wakeup the master thread for sending */ +static void master_wakeup_work_handler(struct work_struct *work) +{ + struct ipvs_master_sync_state *ms = + container_of(work, struct ipvs_master_sync_state, + master_wakeup_work.work); + struct netns_ipvs *ipvs = ms->ipvs; + + spin_lock_bh(&ipvs->sync_lock); + if (ms->sync_queue_len && + ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { + ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; + wake_up_process(ms->master_thread); + } + spin_unlock_bh(&ipvs->sync_lock); +} + +/* Get next buffer to send */ +static inline struct ip_vs_sync_buff * +next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) +{ + struct ip_vs_sync_buff *sb; + + sb = sb_dequeue(ipvs, ms); + if (sb) + return sb; + /* Do not delay entries in buffer for more than 2 seconds */ + return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); +} static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = net_ipvs(tinfo->net); + struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; + struct sock *sk = tinfo->sock->sk; struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " - "syncid = %d\n", - ipvs->master_mcast_ifn, ipvs->master_syncid); + "syncid = %d, id = %d\n", + ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); - while (!kthread_should_stop()) { - while ((sb = sb_dequeue(ipvs))) { - ip_vs_send_sync_msg(tinfo->sock, sb->mesg); - ip_vs_sync_buff_release(sb); + for (;;) { + sb = next_sync_buff(ipvs, ms); + if (unlikely(kthread_should_stop())) + break; + if (!sb) { + schedule_timeout(IPVS_SYNC_CHECK_PERIOD); + continue; } - - /* check if entries stay in ipvs->sync_buff for 2 seconds */ - sb = get_curr_sync_buff(ipvs, 2 * HZ); - if (sb) { - ip_vs_send_sync_msg(tinfo->sock, sb->mesg); - ip_vs_sync_buff_release(sb); + while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { + int ret = 0; + + __wait_event_interruptible(*sk_sleep(sk), + sock_writeable(sk) || + kthread_should_stop(), + ret); + if (unlikely(kthread_should_stop())) + goto done; } - - schedule_timeout_interruptible(HZ); + ip_vs_sync_buff_release(sb); } +done: + __set_current_state(TASK_RUNNING); + if (sb) + ip_vs_sync_buff_release(sb); + /* clean up the sync_buff queue */ - while ((sb = sb_dequeue(ipvs))) + while ((sb = sb_dequeue(ipvs, ms))) ip_vs_sync_buff_release(sb); + __set_current_state(TASK_RUNNING); /* clean up the current sync_buff */ - sb = get_curr_sync_buff(ipvs, 0); + sb = get_curr_sync_buff(ipvs, ms, 0); if (sb) ip_vs_sync_buff_release(sb); @@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data) int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " - "syncid = %d\n", - ipvs->backup_mcast_ifn, ipvs->backup_syncid); + "syncid = %d, id = %d\n", + ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1511,7 +1687,8 @@ static int sync_thread_backup(void *data) len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->recv_mesg_maxlen); if (len <= 0) { - pr_err("receiving message error\n"); + if (len != -EAGAIN) + pr_err("receiving message error\n"); break; } @@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data) int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) { struct ip_vs_sync_thread_data *tinfo; - struct task_struct **realtask, *task; + struct task_struct **array = NULL, *task; struct socket *sock; struct netns_ipvs *ipvs = net_ipvs(net); - char *name, *buf = NULL; + char *name; int (*threadfn)(void *data); + int id, count; int result = -ENOMEM; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + if (!ipvs->sync_state) { + count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); + ipvs->threads_mask = count - 1; + } else + count = ipvs->threads_mask + 1; if (state == IP_VS_STATE_MASTER) { - if (ipvs->master_thread) + if (ipvs->ms) return -EEXIST; strlcpy(ipvs->master_mcast_ifn, mcast_ifn, sizeof(ipvs->master_mcast_ifn)); ipvs->master_syncid = syncid; - realtask = &ipvs->master_thread; - name = "ipvs_master:%d"; + name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; - sock = make_send_sock(net); } else if (state == IP_VS_STATE_BACKUP) { - if (ipvs->backup_thread) + if (ipvs->backup_threads) return -EEXIST; strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, sizeof(ipvs->backup_mcast_ifn)); ipvs->backup_syncid = syncid; - realtask = &ipvs->backup_thread; - name = "ipvs_backup:%d"; + name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; - sock = make_receive_sock(net); } else { return -EINVAL; } - if (IS_ERR(sock)) { - result = PTR_ERR(sock); - goto out; - } + if (state == IP_VS_STATE_MASTER) { + struct ipvs_master_sync_state *ms; - set_sync_mesg_maxlen(net, state); - if (state == IP_VS_STATE_BACKUP) { - buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); - if (!buf) - goto outsocket; + ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); + if (!ipvs->ms) + goto out; + ms = ipvs->ms; + for (id = 0; id < count; id++, ms++) { + INIT_LIST_HEAD(&ms->sync_queue); + ms->sync_queue_len = 0; + ms->sync_queue_delay = 0; + INIT_DELAYED_WORK(&ms->master_wakeup_work, + master_wakeup_work_handler); + ms->ipvs = ipvs; + } + } else { + array = kzalloc(count * sizeof(struct task_struct *), + GFP_KERNEL); + if (!array) + goto out; } + set_sync_mesg_maxlen(net, state); - tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); - if (!tinfo) - goto outbuf; - - tinfo->net = net; - tinfo->sock = sock; - tinfo->buf = buf; + tinfo = NULL; + for (id = 0; id < count; id++) { + if (state == IP_VS_STATE_MASTER) + sock = make_send_sock(net, id); + else + sock = make_receive_sock(net, id); + if (IS_ERR(sock)) { + result = PTR_ERR(sock); + goto outtinfo; + } + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + if (!tinfo) + goto outsocket; + tinfo->net = net; + tinfo->sock = sock; + if (state == IP_VS_STATE_BACKUP) { + tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, + GFP_KERNEL); + if (!tinfo->buf) + goto outtinfo; + } + tinfo->id = id; - task = kthread_run(threadfn, tinfo, name, ipvs->gen); - if (IS_ERR(task)) { - result = PTR_ERR(task); - goto outtinfo; + task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); + if (IS_ERR(task)) { + result = PTR_ERR(task); + goto outtinfo; + } + tinfo = NULL; + if (state == IP_VS_STATE_MASTER) + ipvs->ms[id].master_thread = task; + else + array[id] = task; } /* mark as active */ - *realtask = task; + + if (state == IP_VS_STATE_BACKUP) + ipvs->backup_threads = array; + spin_lock_bh(&ipvs->sync_buff_lock); ipvs->sync_state |= state; + spin_unlock_bh(&ipvs->sync_buff_lock); /* increase the module use count */ ip_vs_use_count_inc(); return 0; -outtinfo: - kfree(tinfo); -outbuf: - kfree(buf); outsocket: sk_release_kernel(sock->sk); + +outtinfo: + if (tinfo) { + sk_release_kernel(tinfo->sock->sk); + kfree(tinfo->buf); + kfree(tinfo); + } + count = id; + while (count-- > 0) { + if (state == IP_VS_STATE_MASTER) + kthread_stop(ipvs->ms[count].master_thread); + else + kthread_stop(array[count]); + } + kfree(array); + out: + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { + kfree(ipvs->ms); + ipvs->ms = NULL; + } return result; } @@ -1622,38 +1853,60 @@ out: int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); + struct task_struct **array; + int id; int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); if (state == IP_VS_STATE_MASTER) { - if (!ipvs->master_thread) + if (!ipvs->ms) return -ESRCH; - pr_info("stopping master sync thread %d ...\n", - task_pid_nr(ipvs->master_thread)); - /* * The lock synchronizes with sb_queue_tail(), so that we don't * add sync buffers to the queue, when we are already in * progress of stopping the master sync daemon. */ - spin_lock_bh(&ipvs->sync_lock); + spin_lock_bh(&ipvs->sync_buff_lock); + spin_lock(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock_bh(&ipvs->sync_lock); - retc = kthread_stop(ipvs->master_thread); - ipvs->master_thread = NULL; + spin_unlock(&ipvs->sync_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); + + retc = 0; + for (id = ipvs->threads_mask; id >= 0; id--) { + struct ipvs_master_sync_state *ms = &ipvs->ms[id]; + int ret; + + pr_info("stopping master sync thread %d ...\n", + task_pid_nr(ms->master_thread)); + cancel_delayed_work_sync(&ms->master_wakeup_work); + ret = kthread_stop(ms->master_thread); + if (retc >= 0) + retc = ret; + } + kfree(ipvs->ms); + ipvs->ms = NULL; } else if (state == IP_VS_STATE_BACKUP) { - if (!ipvs->backup_thread) + if (!ipvs->backup_threads) return -ESRCH; - pr_info("stopping backup sync thread %d ...\n", - task_pid_nr(ipvs->backup_thread)); - ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - retc = kthread_stop(ipvs->backup_thread); - ipvs->backup_thread = NULL; + array = ipvs->backup_threads; + retc = 0; + for (id = ipvs->threads_mask; id >= 0; id--) { + int ret; + + pr_info("stopping backup sync thread %d ...\n", + task_pid_nr(array[id])); + ret = kthread_stop(array[id]); + if (retc >= 0) + retc = ret; + } + kfree(array); + ipvs->backup_threads = NULL; } /* decrease the module use count */ @@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); - INIT_LIST_HEAD(&ipvs->sync_queue); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); - - ipvs->sync_mcast_addr.sin_family = AF_INET; - ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); - ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index fd0d4e09876..231be7dd547 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) /* * Allocate the mark variable for WRR scheduling */ - mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC); + mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL); if (mark == NULL) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index f4f8cda0598..d61e0782a79 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -69,8 +69,8 @@ static int nf_conntrack_acct_init_sysctl(struct net *net) table[0].data = &net->ct.sysctl_acct; - net->ct.acct_sysctl_header = register_net_sysctl_table(net, - nf_net_netfilter_sysctl_path, table); + net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter", + table); if (!net->ct.acct_sysctl_header) { printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); goto out_register; diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 13fd2c55e32..f2de8c55ac5 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -107,8 +107,7 @@ static int amanda_help(struct sk_buff *skb, /* No data? */ dataoff = protoff + sizeof(struct udphdr); if (dataoff >= skb->len) { - if (net_ratelimit()) - printk(KERN_ERR "amanda_help: skblen = %u\n", skb->len); + net_err_ratelimited("amanda_help: skblen = %u\n", skb->len); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 729f157a0ef..ac3af97cc46 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -683,10 +683,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { if (!early_drop(net, hash_bucket(hash, net))) { atomic_dec(&net->ct.count); - if (net_ratelimit()) - printk(KERN_WARNING - "nf_conntrack: table full, dropping" - " packet.\n"); + net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); } } @@ -1152,8 +1149,9 @@ static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port); - NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port); + if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || + nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1335,7 +1333,6 @@ static void nf_conntrack_cleanup_init_net(void) while (untrack_refs() > 0) schedule(); - nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); @@ -1353,6 +1350,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_helper_fini(net); nf_conntrack_timeout_fini(net); nf_conntrack_ecache_fini(net); nf_conntrack_tstamp_fini(net); @@ -1503,10 +1501,6 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_proto; - ret = nf_conntrack_helper_init(); - if (ret < 0) - goto err_helper; - #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) @@ -1524,10 +1518,8 @@ static int nf_conntrack_init_init_net(void) #ifdef CONFIG_NF_CONNTRACK_ZONES err_extend: - nf_conntrack_helper_fini(); -#endif -err_helper: nf_conntrack_proto_fini(); +#endif err_proto: return ret; } @@ -1588,9 +1580,14 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_timeout_init(net); if (ret < 0) goto err_timeout; + ret = nf_conntrack_helper_init(net); + if (ret < 0) + goto err_helper; return 0; +err_helper: + nf_conntrack_timeout_fini(net); err_timeout: nf_conntrack_ecache_fini(net); err_ecache: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5bd3047ddee..e7be79e640d 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); int nf_conntrack_register_notifier(struct net *net, struct nf_ct_event_notifier *new) { - int ret = 0; + int ret; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); @@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net, goto out_unlock; } rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); - mutex_unlock(&nf_ct_ecache_mutex); - return ret; + ret = 0; out_unlock: mutex_unlock(&nf_ct_ecache_mutex); @@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); int nf_ct_expect_register_notifier(struct net *net, struct nf_exp_event_notifier *new) { - int ret = 0; + int ret; struct nf_exp_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); @@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net, goto out_unlock; } rcu_assign_pointer(net->ct.nf_expect_event_cb, new); - mutex_unlock(&nf_ct_ecache_mutex); - return ret; + ret = 0; out_unlock: mutex_unlock(&nf_ct_ecache_mutex); @@ -199,8 +197,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net) table[1].data = &net->ct.sysctl_events_retry_timeout; net->ct.event_sysctl_header = - register_net_sysctl_table(net, - nf_net_netfilter_sysctl_path, table); + register_net_sysctl(net, "net/netfilter", table); if (!net->ct.event_sysctl_header) { printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); goto out_register; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4147ba3f653..45cf602a76b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -424,9 +424,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } if (net->ct.expect_count >= nf_ct_expect_max) { - if (net_ratelimit()) - printk(KERN_WARNING - "nf_conntrack: expectation table full\n"); + net_warn_ratelimited("nf_conntrack: expectation table full\n"); ret = -EMFILE; } out: diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 722291f8af7..46d69d7f1bb 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -605,8 +605,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - if (net_ratelimit()) - pr_info("nf_ct_h245: packet dropped\n"); + net_info_ratelimited("nf_ct_h245: packet dropped\n"); return NF_DROP; } @@ -1156,8 +1155,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - if (net_ratelimit()) - pr_info("nf_ct_q931: packet dropped\n"); + net_info_ratelimited("nf_ct_q931: packet dropped\n"); return NF_DROP; } @@ -1230,7 +1228,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, /****************************************************************************/ static int set_expect_timeout(struct nf_conntrack_expect *exp, - unsigned timeout) + unsigned int timeout) { if (!exp || !del_timer(&exp->timeout)) return 0; @@ -1731,8 +1729,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, drop: spin_unlock_bh(&nf_h323_lock); - if (net_ratelimit()) - pr_info("nf_ct_ras: packet dropped\n"); + net_info_ratelimited("nf_ct_ras: packet dropped\n"); return NF_DROP; } @@ -1833,4 +1830,6 @@ MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); MODULE_DESCRIPTION("H.323 connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_h323"); -MODULE_ALIAS_NFCT_HELPER("h323"); +MODULE_ALIAS_NFCT_HELPER("RAS"); +MODULE_ALIAS_NFCT_HELPER("Q.931"); +MODULE_ALIAS_NFCT_HELPER("H.245"); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 436b7cb79ba..4fa2ff961f5 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly; static unsigned int nf_ct_helper_hsize __read_mostly; static unsigned int nf_ct_helper_count __read_mostly; +static bool nf_ct_auto_assign_helper __read_mostly = true; +module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); +MODULE_PARM_DESC(nf_conntrack_helper, + "Enable automatic conntrack helper assignment (default 1)"); + +#ifdef CONFIG_SYSCTL +static struct ctl_table helper_sysctl_table[] = { + { + .procname = "nf_conntrack_helper", + .data = &init_net.ct.sysctl_auto_assign_helper, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +static int nf_conntrack_helper_init_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_auto_assign_helper; + + net->ct.helper_sysctl_header = + register_net_sysctl(net, "net/netfilter", table); + + if (!net->ct.helper_sysctl_header) { + pr_err("nf_conntrack_helper: can't register to sysctl.\n"); + goto out_register; + } + return 0; + +out_register: + kfree(table); +out: + return -ENOMEM; +} + +static void nf_conntrack_helper_fini_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = net->ct.helper_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.helper_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_helper_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_helper_fini_sysctl(struct net *net) +{ +} +#endif /* CONFIG_SYSCTL */ /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ @@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; + struct net *net = nf_ct_net(ct); int ret = 0; + /* We already got a helper explicitly attached. The function + * nf_conntrack_alter_reply - in case NAT is in use - asks for looking + * the helper up again. Since now the user is in full control of + * making consistent helper configurations, skip this automatic + * re-lookup, otherwise we'll lose the helper. + */ + if (test_bit(IPS_HELPER_BIT, &ct->status)) + return 0; + if (tmpl != NULL) { help = nfct_help(tmpl); - if (help != NULL) + if (help != NULL) { helper = help->helper; + set_bit(IPS_HELPER_BIT, &ct->status); + } } help = nfct_help(ct); - if (helper == NULL) + if (net->ct.sysctl_auto_assign_helper && helper == NULL) { helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (unlikely(!net->ct.auto_assign_helper_warned && helper)) { + pr_info("nf_conntrack: automatic helper " + "assignment is deprecated and it will " + "be removed soon. Use the iptables CT target " + "to attach helpers instead.\n"); + net->ct.auto_assign_helper_warned = true; + } + } + if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); @@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_init(void) +int nf_conntrack_helper_init(struct net *net) { int err; - nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ - nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); - if (!nf_ct_helper_hash) - return -ENOMEM; + net->ct.auto_assign_helper_warned = false; + net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; + + if (net_eq(net, &init_net)) { + nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ + nf_ct_helper_hash = + nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); + if (!nf_ct_helper_hash) + return -ENOMEM; - err = nf_ct_extend_register(&helper_extend); + err = nf_ct_extend_register(&helper_extend); + if (err < 0) + goto err1; + } + + err = nf_conntrack_helper_init_sysctl(net); if (err < 0) - goto err1; + goto out_sysctl; return 0; +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&helper_extend); err1: nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); return err; } -void nf_conntrack_helper_fini(void) +void nf_conntrack_helper_fini(struct net *net) { - nf_ct_extend_unregister(&helper_extend); - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); + nf_conntrack_helper_fini_sysctl(net); + if (net_eq(net, &init_net)) { + nf_ct_extend_unregister(&helper_extend); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); + } } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 4f9390b9869..81366c11827 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -185,11 +185,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, tuple = &ct->tuplehash[dir].tuple; if (tuple->src.u3.ip != dcc_ip && tuple->dst.u3.ip != dcc_ip) { - if (net_ratelimit()) - printk(KERN_WARNING - "Forged DCC command from %pI4: %pI4:%u\n", - &tuple->src.u3.ip, - &dcc_ip, dcc_port); + net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n", + &tuple->src.u3.ip, + &dcc_ip, dcc_port); continue; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ca7e8354e4f..6f4b00a8fc7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -66,7 +66,8 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb, nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - NLA_PUT_U8(skb, CTA_PROTO_NUM, tuple->dst.protonum); + if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum)) + goto nla_put_failure; if (likely(l4proto->tuple_to_nlattr)) ret = l4proto->tuple_to_nlattr(skb, tuple); @@ -126,7 +127,8 @@ ctnetlink_dump_tuples(struct sk_buff *skb, static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { - NLA_PUT_BE32(skb, CTA_STATUS, htonl(ct->status)); + if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) + goto nla_put_failure; return 0; nla_put_failure: @@ -141,7 +143,8 @@ ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) if (timeout < 0) timeout = 0; - NLA_PUT_BE32(skb, CTA_TIMEOUT, htonl(timeout)); + if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) + goto nla_put_failure; return 0; nla_put_failure: @@ -190,7 +193,8 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); if (!nest_helper) goto nla_put_failure; - NLA_PUT_STRING(skb, CTA_HELP_NAME, helper->name); + if (nla_put_string(skb, CTA_HELP_NAME, helper->name)) + goto nla_put_failure; if (helper->to_nlattr) helper->to_nlattr(skb, ct); @@ -214,8 +218,9 @@ dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, if (!nest_count) goto nla_put_failure; - NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)); - NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes)); + if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)) || + nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; nla_nest_end(skb, nest_count); @@ -260,11 +265,10 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) if (!nest_count) goto nla_put_failure; - NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); - if (tstamp->stop != 0) { - NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, - cpu_to_be64(tstamp->stop)); - } + if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)) || + (tstamp->stop != 0 && nla_put_be64(skb, CTA_TIMESTAMP_STOP, + cpu_to_be64(tstamp->stop)))) + goto nla_put_failure; nla_nest_end(skb, nest_count); return 0; @@ -277,7 +281,8 @@ nla_put_failure: static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { - NLA_PUT_BE32(skb, CTA_MARK, htonl(ct->mark)); + if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + goto nla_put_failure; return 0; nla_put_failure: @@ -304,7 +309,8 @@ ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) if (!nest_secctx) goto nla_put_failure; - NLA_PUT_STRING(skb, CTA_SECCTX_NAME, secctx); + if (nla_put_string(skb, CTA_SECCTX_NAME, secctx)) + goto nla_put_failure; nla_nest_end(skb, nest_secctx); ret = 0; @@ -349,12 +355,13 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) if (!nest_parms) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_NAT_SEQ_CORRECTION_POS, - htonl(natseq->correction_pos)); - NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, - htonl(natseq->offset_before)); - NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_AFTER, - htonl(natseq->offset_after)); + if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS, + htonl(natseq->correction_pos)) || + nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, + htonl(natseq->offset_before)) || + nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER, + htonl(natseq->offset_after))) + goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -390,7 +397,8 @@ ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct)); + if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + goto nla_put_failure; return 0; nla_put_failure: @@ -400,7 +408,8 @@ nla_put_failure: static inline int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { - NLA_PUT_BE32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use))); + if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) + goto nla_put_failure; return 0; nla_put_failure: @@ -440,8 +449,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct)) - NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (nf_ct_zone(ct) && + nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || @@ -617,8 +627,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct)) - NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (nf_ct_zone(ct) && + nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -1705,7 +1716,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, if (!nest_parms) goto nla_put_failure; - NLA_PUT_BE32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir)); + if (nla_put_be32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir))) + goto nla_put_failure; nat_tuple.src.l3num = nf_ct_l3num(master); nat_tuple.src.u3.ip = exp->saved_ip; @@ -1718,21 +1730,24 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, nla_nest_end(skb, nest_parms); } #endif - NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); - NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); - NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)); - NLA_PUT_BE32(skb, CTA_EXPECT_CLASS, htonl(exp->class)); + if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || + nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || + nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) + goto nla_put_failure; help = nfct_help(master); if (help) { struct nf_conntrack_helper *helper; helper = rcu_dereference(help->helper); - if (helper) - NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); + if (helper && + nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) + goto nla_put_failure; } expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); - if (expfn != NULL) - NLA_PUT_STRING(skb, CTA_EXPECT_FN, expfn->name); + if (expfn != NULL && + nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) + goto nla_put_failure; return 0; @@ -2065,7 +2080,15 @@ static int ctnetlink_change_expect(struct nf_conntrack_expect *x, const struct nlattr * const cda[]) { - return -EOPNOTSUPP; + if (cda[CTA_EXPECT_TIMEOUT]) { + if (!del_timer(&x->timeout)) + return -ETIME; + + x->timeout.expires = jiffies + + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; + add_timer(&x->timeout); + } + return 0; } static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index be3da2c8cdc..8b631b07a64 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -36,11 +36,11 @@ static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL static int -nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_path *path, +nf_ct_register_sysctl(struct ctl_table_header **header, const char *path, struct ctl_table *table, unsigned int *users) { if (*header == NULL) { - *header = register_sysctl_paths(path, table); + *header = register_net_sysctl(&init_net, path, table); if (*header == NULL) return -ENOMEM; } @@ -56,7 +56,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, if (users != NULL && --*users > 0) return; - unregister_sysctl_table(*header); + unregister_net_sysctl_table(*header); *header = NULL; } #endif @@ -250,7 +250,7 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto) #ifdef CONFIG_SYSCTL if (l4proto->ctl_table != NULL) { err = nf_ct_register_sysctl(l4proto->ctl_table_header, - nf_net_netfilter_sysctl_path, + "net/netfilter", l4proto->ctl_table, l4proto->ctl_table_users); if (err < 0) @@ -259,7 +259,7 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto) #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT if (l4proto->ctl_compat_table != NULL) { err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header, - nf_net_ipv4_netfilter_sysctl_path, + "net/ipv4/netfilter", l4proto->ctl_compat_table, NULL); if (err == 0) goto out; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 24fdce256cb..ef706a485be 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -643,11 +643,12 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); - NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); - NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, - cpu_to_be64(ct->proto.dccp.handshake_seq)); + if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || + nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || + nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, + cpu_to_be64(ct->proto.dccp.handshake_seq))) + goto nla_put_failure; nla_nest_end(skb, nest_parms); spin_unlock_bh(&ct->lock); return 0; @@ -739,9 +740,10 @@ dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) const unsigned int *timeouts = data; int i; - for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) - NLA_PUT_BE32(skb, i, htonl(timeouts[i] / HZ)); - + for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { + if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) + goto nla_put_failure; + } return 0; nla_put_failure: @@ -908,8 +910,8 @@ static __net_init int dccp_net_init(struct net *net) dn->sysctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; dn->sysctl_table[7].data = &dn->dccp_loose; - dn->sysctl_header = register_net_sysctl_table(net, - nf_net_netfilter_sysctl_path, dn->sysctl_table); + dn->sysctl_header = register_net_sysctl(net, "net/netfilter", + dn->sysctl_table); if (!dn->sysctl_header) { kfree(dn->sysctl_table); return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 835e24c58f0..d8923d54b35 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -90,7 +90,8 @@ generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ)); + if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) + goto nla_put_failure; return 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 659648c4b14..4bf6b4e4b77 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -321,10 +321,11 @@ gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_UNREPLIED, - htonl(timeouts[GRE_CT_UNREPLIED] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_REPLIED, - htonl(timeouts[GRE_CT_REPLIED] / HZ)); + if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED, + htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED, + htonl(timeouts[GRE_CT_REPLIED] / HZ))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 72b5088592d..996db2fa21f 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -482,15 +482,12 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; - NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state); - - NLA_PUT_BE32(skb, - CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, - ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]); - - NLA_PUT_BE32(skb, - CTA_PROTOINFO_SCTP_VTAG_REPLY, - ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); + if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state) || + nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) || + nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, + ct->proto.sctp.vtag[IP_CT_DIR_REPLY])) + goto nla_put_failure; spin_unlock_bh(&ct->lock); @@ -578,9 +575,10 @@ sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) const unsigned int *timeouts = data; int i; - for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) - NLA_PUT_BE32(skb, i, htonl(timeouts[i] / HZ)); - + for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { + if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) + goto nla_put_failure; + } return 0; nla_put_failure: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 0d07a1dcf60..21ff1a99f53 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -952,7 +952,8 @@ static int tcp_packet(struct nf_conn *ct, spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid packet ignored "); + "nf_ct_tcp: invalid packet ignored in " + "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ @@ -1147,21 +1148,22 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; - NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state); - - NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, - ct->proto.tcp.seen[0].td_scale); - - NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, - ct->proto.tcp.seen[1].td_scale); + if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) || + nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, + ct->proto.tcp.seen[0].td_scale) || + nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, + ct->proto.tcp.seen[1].td_scale)) + goto nla_put_failure; tmp.flags = ct->proto.tcp.seen[0].flags; - NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, - sizeof(struct nf_ct_tcp_flags), &tmp); + if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, + sizeof(struct nf_ct_tcp_flags), &tmp)) + goto nla_put_failure; tmp.flags = ct->proto.tcp.seen[1].flags; - NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, - sizeof(struct nf_ct_tcp_flags), &tmp); + if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, + sizeof(struct nf_ct_tcp_flags), &tmp)) + goto nla_put_failure; spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); @@ -1310,28 +1312,29 @@ tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_SENT, - htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_RECV, - htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, - htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, - htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, - htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_LAST_ACK, - htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, - htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_CLOSE, - htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, - htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_RETRANS, - htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_UNACK, - htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)); + if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT, + htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV, + htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, + htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, + htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, + htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK, + htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, + htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE, + htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, + htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS, + htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK, + htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a9073dc1548..7259a6bdeb4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -181,10 +181,11 @@ udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_UNREPLIED, - htonl(timeouts[UDP_CT_UNREPLIED] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_REPLIED, - htonl(timeouts[UDP_CT_REPLIED] / HZ)); + if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED, + htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED, + htonl(timeouts[UDP_CT_REPLIED] / HZ))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index e0606392cda..4d60a5376aa 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -185,10 +185,11 @@ udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; - NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, - htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)); - NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, - htonl(timeouts[UDPLITE_CT_REPLIED] / HZ)); + if (nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, + htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)) || + nla_put_be32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, + htonl(timeouts[UDPLITE_CT_REPLIED] / HZ))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 885f5ab9bc2..9b3943252a5 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -468,18 +468,13 @@ static ctl_table nf_ct_netfilter_table[] = { { } }; -static struct ctl_path nf_ct_path[] = { - { .procname = "net", }, - { } -}; - static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; if (net_eq(net, &init_net)) { nf_ct_netfilter_header = - register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); + register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) goto out; } @@ -494,8 +489,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; - net->ct.sysctl_header = register_net_sysctl_table(net, - nf_net_netfilter_sysctl_path, table); + net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) goto out_unregister_netfilter; @@ -505,7 +499,7 @@ out_unregister_netfilter: kfree(table); out_kmemdup: if (net_eq(net, &init_net)) - unregister_sysctl_table(nf_ct_netfilter_header); + unregister_net_sysctl_table(nf_ct_netfilter_header); out: printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; @@ -516,7 +510,7 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) struct ctl_table *table; if (net_eq(net, &init_net)) - unregister_sysctl_table(nf_ct_netfilter_header); + unregister_net_sysctl_table(nf_ct_netfilter_header); table = net->ct.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->ct.sysctl_header); kfree(table); diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index e8d27afbbdb..dbb364f62d6 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -51,8 +51,8 @@ static int nf_conntrack_tstamp_init_sysctl(struct net *net) table[0].data = &net->ct.sysctl_tstamp; - net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, - nf_net_netfilter_sysctl_path, table); + net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter", + table); if (!net->ct.tstamp_sysctl_header) { printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); goto out_register; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 957374a234d..703fb26aa48 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -214,13 +214,6 @@ static const struct file_operations nflog_file_ops = { #endif /* PROC_FS */ #ifdef CONFIG_SYSCTL -static struct ctl_path nf_log_sysctl_path[] = { - { .procname = "net", }, - { .procname = "netfilter", }, - { .procname = "nf_log", }, - { } -}; - static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; @@ -283,7 +276,7 @@ static __init int netfilter_log_sysctl_init(void) nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; } - nf_log_dir_header = register_sysctl_paths(nf_log_sysctl_path, + nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", nf_log_sysctl_table); if (!nf_log_dir_header) return -ENOMEM; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e6ddde16561..3e797d1fcb9 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -103,7 +103,7 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, - unsigned group, int echo, gfp_t flags) + unsigned int group, int echo, gfp_t flags) { return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d98c868c148..b2e7310ca0b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -109,7 +109,8 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - NLA_PUT_STRING(skb, NFACCT_NAME, acct->name); + if (nla_put_string(skb, NFACCT_NAME, acct->name)) + goto nla_put_failure; if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); @@ -118,9 +119,10 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); } - NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts)); - NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes)); - NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))); + if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || + nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || + nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) + goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 2b9e79f5ef0..3e655288d1d 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -170,11 +170,12 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name); - NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)); - NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto); - NLA_PUT_BE32(skb, CTA_TIMEOUT_USE, - htonl(atomic_read(&timeout->refcnt))); + if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || + nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || + nla_put_be32(skb, CTA_TIMEOUT_USE, + htonl(atomic_read(&timeout->refcnt)))) + goto nla_put_failure; if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { struct nlattr *nest_parms; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 66b2c54c544..3c3cfc0cc9b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -391,67 +391,78 @@ __build_packet_message(struct nfulnl_instance *inst, pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; - NLA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); + if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg)) + goto nla_put_failure; - if (prefix) - NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); + if (prefix && + nla_put(inst->skb, NFULA_PREFIX, plen, prefix)) + goto nla_put_failure; if (indev) { #ifndef CONFIG_BRIDGE_NETFILTER - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(indev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, + htonl(indev->ifindex))) + goto nla_put_failure; #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, - htonl(indev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex)); + nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, + htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + goto nla_put_failure; } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(indev->ifindex)); - if (skb->nf_bridge && skb->nf_bridge->physindev) - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, - htonl(skb->nf_bridge->physindev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, + htonl(indev->ifindex))) + goto nla_put_failure; + if (skb->nf_bridge && skb->nf_bridge->physindev && + nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, + htonl(skb->nf_bridge->physindev->ifindex))) + goto nla_put_failure; } #endif } if (outdev) { #ifndef CONFIG_BRIDGE_NETFILTER - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(outdev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, + htonl(outdev->ifindex))) + goto nla_put_failure; #else if (pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, - htonl(outdev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); + nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, + htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + goto nla_put_failure; } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(outdev->ifindex)); - if (skb->nf_bridge && skb->nf_bridge->physoutdev) - NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, - htonl(skb->nf_bridge->physoutdev->ifindex)); + if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, + htonl(outdev->ifindex))) + goto nla_put_failure; + if (skb->nf_bridge && skb->nf_bridge->physoutdev && + nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + htonl(skb->nf_bridge->physoutdev->ifindex))) + goto nla_put_failure; } #endif } - if (skb->mark) - NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); + if (skb->mark && + nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark))) + goto nla_put_failure; if (indev && skb->dev && skb->mac_header != skb->network_header) { @@ -459,16 +470,18 @@ __build_packet_message(struct nfulnl_instance *inst, int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { phw.hw_addrlen = htons(len); - NLA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw)) + goto nla_put_failure; } } if (indev && skb_mac_header_was_set(skb)) { - NLA_PUT_BE16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)); - NLA_PUT_BE16(inst->skb, NFULA_HWLEN, - htons(skb->dev->hard_header_len)); - NLA_PUT(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, - skb_mac_header(skb)); + if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || + nla_put_be16(inst->skb, NFULA_HWLEN, + htons(skb->dev->hard_header_len)) || + nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, + skb_mac_header(skb))) + goto nla_put_failure; } if (skb->tstamp.tv64) { @@ -477,7 +490,8 @@ __build_packet_message(struct nfulnl_instance *inst, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NLA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); + if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts)) + goto nla_put_failure; } /* UID */ @@ -487,22 +501,24 @@ __build_packet_message(struct nfulnl_instance *inst, struct file *file = skb->sk->sk_socket->file; __be32 uid = htonl(file->f_cred->fsuid); __be32 gid = htonl(file->f_cred->fsgid); - /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); - NLA_PUT_BE32(inst->skb, NFULA_UID, uid); - NLA_PUT_BE32(inst->skb, NFULA_GID, gid); + if (nla_put_be32(inst->skb, NFULA_UID, uid) || + nla_put_be32(inst->skb, NFULA_GID, gid)) + goto nla_put_failure; } else read_unlock_bh(&skb->sk->sk_callback_lock); } /* local sequence number */ - if (inst->flags & NFULNL_CFG_F_SEQ) - NLA_PUT_BE32(inst->skb, NFULA_SEQ, htonl(inst->seq++)); + if ((inst->flags & NFULNL_CFG_F_SEQ) && + nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++))) + goto nla_put_failure; /* global sequence number */ - if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) - NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL, - htonl(atomic_inc_return(&global_seq))); + if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && + nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, + htonl(atomic_inc_return(&global_seq)))) + goto nla_put_failure; if (data_len) { struct nlattr *nla; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a80b0cb03f1..4162437b836 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -288,58 +288,67 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, indev = entry->indev; if (indev) { #ifndef CONFIG_BRIDGE_NETFILTER - NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) + goto nla_put_failure; #else if (entry->pf == PF_BRIDGE) { /* Case 1: indev is physical input device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(indev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by __nf_queue */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex)); + nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + goto nla_put_failure; } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->ifindex)); - if (entskb->nf_bridge && entskb->nf_bridge->physindev) - NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(entskb->nf_bridge->physindev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physindev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(entskb->nf_bridge->physindev->ifindex))) + goto nla_put_failure; } #endif } if (outdev) { #ifndef CONFIG_BRIDGE_NETFILTER - NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) + goto nla_put_failure; #else if (entry->pf == PF_BRIDGE) { /* Case 1: outdev is physical output device, we need to * look for bridge group (when called from * netfilter_bridge) */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(outdev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ /* rcu_read_lock()ed by __nf_queue */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); + nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + goto nla_put_failure; } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ - NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->ifindex)); - if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) - NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(entskb->nf_bridge->physoutdev->ifindex)); + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(entskb->nf_bridge->physoutdev->ifindex))) + goto nla_put_failure; } #endif } - if (entskb->mark) - NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); + if (entskb->mark && + nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) + goto nla_put_failure; if (indev && entskb->dev && entskb->mac_header != entskb->network_header) { @@ -347,7 +356,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, int len = dev_parse_header(entskb, phw.hw_addr); if (len) { phw.hw_addrlen = htons(len); - NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) + goto nla_put_failure; } } @@ -357,7 +367,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, ts.sec = cpu_to_be64(tv.tv_sec); ts.usec = cpu_to_be64(tv.tv_usec); - NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) + goto nla_put_failure; } if (data_len) { @@ -384,8 +395,7 @@ nlmsg_failure: nla_put_failure: if (skb) kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_ERR "nf_queue: error creating packet message\n"); + net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; } @@ -422,10 +432,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } if (queue->queue_total >= queue->queue_maxlen) { queue->queue_dropped++; - if (net_ratelimit()) - printk(KERN_WARNING "nf_queue: full at %d entries, " - "dropping packets(s).\n", - queue->queue_total); + net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n", + queue->queue_total); goto err_out_free_nskb; } entry->id = ++queue->id_sequence; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 59530e93fa5..a51de9b052b 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -17,7 +17,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> -#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -227,7 +226,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - if (info->timeout) { + if (info->timeout[0]) { typeof(nf_ct_timeout_find_get_hook) timeout_find_get; struct nf_conn_timeout *timeout_ext; diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c new file mode 100644 index 00000000000..0a96a43108e --- /dev/null +++ b/net/netfilter/xt_HMARK.c @@ -0,0 +1,362 @@ +/* + * xt_HMARK - Netfilter module to set mark by means of hashing + * + * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> + * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/icmp.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_HMARK.h> + +#include <net/ip.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +#include <net/ipv6.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); +MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); +MODULE_ALIAS("ipt_HMARK"); +MODULE_ALIAS("ip6t_HMARK"); + +struct hmark_tuple { + u32 src; + u32 dst; + union hmark_ports uports; + uint8_t proto; +}; + +static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) +{ + return (addr32[0] & mask[0]) ^ + (addr32[1] & mask[1]) ^ + (addr32[2] & mask[2]) ^ + (addr32[3] & mask[3]); +} + +static inline u32 +hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) +{ + switch (l3num) { + case AF_INET: + return *addr32 & *mask; + case AF_INET6: + return hmark_addr6_mask(addr32, mask); + } + return 0; +} + +static int +hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conntrack_tuple *otuple; + struct nf_conntrack_tuple *rtuple; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return -1; + + otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, + info->src_mask.all); + t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, + info->dst_mask.all); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = nf_ct_protonum(ct); + if (t->proto != IPPROTO_ICMP) { + t->uports.p16.src = otuple->src.u.all; + t->uports.p16.dst = rtuple->src.u.all; + t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | + info->port_set.v32; + if (t->uports.p16.dst < t->uports.p16.src) + swap(t->uports.p16.dst, t->uports.p16.src); + } + + return 0; +#else + return -1; +#endif +} + +static inline u32 +hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) +{ + u32 hash; + + if (t->dst < t->src) + swap(t->src, t->dst); + + hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd); + hash = hash ^ (t->proto & info->proto_mask); + + return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; +} + +static void +hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, + struct hmark_tuple *t, const struct xt_hmark_info *info) +{ + int protoff; + + protoff = proto_ports_offset(t->proto); + if (protoff < 0) + return; + + nhoff += protoff; + if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) + return; + + t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | + info->port_set.v32; + + if (t->uports.p16.dst < t->uports.p16.src) + swap(t->uports.p16.dst, t->uports.p16.src); +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int get_inner6_hdr(const struct sk_buff *skb, int *offset) +{ + struct icmp6hdr *icmp6h, _ih6; + + icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); + if (icmp6h == NULL) + return 0; + + if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { + *offset += sizeof(struct icmp6hdr); + return 1; + } + return 0; +} + +static int +hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ + struct ipv6hdr *ip6, _ip6; + int flag = IP6T_FH_F_AUTH; + unsigned int nhoff = 0; + u16 fragoff = 0; + int nexthdr; + + ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); + nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); + if (nexthdr < 0) + return 0; + /* No need to check for icmp errors on fragments */ + if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) + goto noicmp; + /* Use inner header in case of ICMP errors */ + if (get_inner6_hdr(skb, &nhoff)) { + ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); + if (ip6 == NULL) + return -1; + /* If AH present, use SPI like in ESP. */ + flag = IP6T_FH_F_AUTH; + nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); + if (nexthdr < 0) + return -1; + } +noicmp: + t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all); + t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = nexthdr; + if (t->proto == IPPROTO_ICMPV6) + return 0; + + if (flag & IP6T_FH_F_FRAG) + return 0; + + hmark_set_tuple_ports(skb, nhoff, t, info); + return 0; +} + +static unsigned int +hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + struct hmark_tuple t; + + memset(&t, 0, sizeof(struct hmark_tuple)); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { + if (hmark_ct_set_htuple(skb, &t, info) < 0) + return XT_CONTINUE; + } else { + if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) + return XT_CONTINUE; + } + + skb->mark = hmark_hash(&t, info); + return XT_CONTINUE; +} +#endif + +static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) +{ + const struct icmphdr *icmph; + struct icmphdr _ih; + + /* Not enough header? */ + icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); + if (icmph == NULL || icmph->type > NR_ICMP_TYPES) + return 0; + + /* Error message? */ + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) + return 0; + + *nhoff += iphsz + sizeof(_ih); + return 1; +} + +static int +hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ + struct iphdr *ip, _ip; + int nhoff = skb_network_offset(skb); + + ip = (struct iphdr *) (skb->data + nhoff); + if (ip->protocol == IPPROTO_ICMP) { + /* Use inner header in case of ICMP errors */ + if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { + ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); + if (ip == NULL) + return -1; + } + } + + t->src = (__force u32) ip->saddr; + t->dst = (__force u32) ip->daddr; + + t->src &= info->src_mask.ip; + t->dst &= info->dst_mask.ip; + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = ip->protocol; + + /* ICMP has no ports, skip */ + if (t->proto == IPPROTO_ICMP) + return 0; + + /* follow-up fragments don't contain ports, skip all fragments */ + if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + return 0; + + hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); + + return 0; +} + +static unsigned int +hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + struct hmark_tuple t; + + memset(&t, 0, sizeof(struct hmark_tuple)); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { + if (hmark_ct_set_htuple(skb, &t, info) < 0) + return XT_CONTINUE; + } else { + if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) + return XT_CONTINUE; + } + + skb->mark = hmark_hash(&t, info); + return XT_CONTINUE; +} + +static int hmark_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + + if (!info->hmodulus) { + pr_info("xt_HMARK: hash modulus can't be zero\n"); + return -EINVAL; + } + if (info->proto_mask && + (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { + pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); + return -EINVAL; + } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && + (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | + XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { + pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); + return -EINVAL; + } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && + (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | + XT_HMARK_FLAG(XT_HMARK_DPORT)))) { + pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target hmark_tg_reg[] __read_mostly = { + { + .name = "HMARK", + .family = NFPROTO_IPV4, + .target = hmark_tg_v4, + .targetsize = sizeof(struct xt_hmark_info), + .checkentry = hmark_tg_check, + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + { + .name = "HMARK", + .family = NFPROTO_IPV6, + .target = hmark_tg_v6, + .targetsize = sizeof(struct xt_hmark_info), + .checkentry = hmark_tg_check, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init hmark_tg_init(void) +{ + return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); +} + +static void __exit hmark_tg_exit(void) +{ + xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); +} + +module_init(hmark_tg_init); +module_exit(hmark_tg_exit); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 190ad37c5cf..71a266de5fb 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -67,15 +67,13 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (info->mss == XT_TCPMSS_CLAMP_PMTU) { if (dst_mtu(skb_dst(skb)) <= minlen) { - if (net_ratelimit()) - pr_err("unknown or invalid path-MTU (%u)\n", - dst_mtu(skb_dst(skb))); + net_err_ratelimited("unknown or invalid path-MTU (%u)\n", + dst_mtu(skb_dst(skb))); return -1; } if (in_mtu <= minlen) { - if (net_ratelimit()) - pr_err("unknown or invalid path-MTU (%u)\n", - in_mtu); + net_err_ratelimited("unknown or invalid path-MTU (%u)\n", + in_mtu); return -1; } newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 35a959a096e..146033a86de 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -282,10 +282,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) struct sock *sk; const struct in6_addr *laddr; __be16 lport; - int thoff; + int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NF_DROP; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d95f9c963cd..26a668a84aa 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -171,8 +171,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, if (ht->cfg.max && ht->count >= ht->cfg.max) { /* FIXME: do something. question is what.. */ - if (net_ratelimit()) - pr_err("max count of %u reached\n", ht->cfg.max); + net_err_ratelimited("max count of %u reached\n", ht->cfg.max); ent = NULL; } else ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); @@ -388,9 +387,20 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +/* in byte mode, the lowest possible rate is one packet/second. + * credit_cap is used as a counter that tells us how many times we can + * refill the "credits available" counter when it becomes empty. + */ +#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ) +#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES) + +static u32 xt_hashlimit_len_to_chunks(u32 len) +{ + return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1; +} + /* Precision saver. */ -static inline u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -400,12 +410,53 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } -static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) +static u32 user2credits_byte(u32 user) { - dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; - if (dh->rateinfo.credit > dh->rateinfo.credit_cap) - dh->rateinfo.credit = dh->rateinfo.credit_cap; + u64 us = user; + us *= HZ * CREDITS_PER_JIFFY_BYTES; + return (u32) (us >> 32); +} + +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +{ + unsigned long delta = now - dh->rateinfo.prev; + u32 cap; + + if (delta == 0) + return; + dh->rateinfo.prev = now; + + if (mode & XT_HASHLIMIT_BYTES) { + u32 tmp = dh->rateinfo.credit; + dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; + cap = CREDITS_PER_JIFFY_BYTES * HZ; + if (tmp >= dh->rateinfo.credit) {/* overflow */ + dh->rateinfo.credit = cap; + return; + } + } else { + dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + cap = dh->rateinfo.credit_cap; + } + if (dh->rateinfo.credit > cap) + dh->rateinfo.credit = cap; +} + +static void rateinfo_init(struct dsthash_ent *dh, + struct xt_hashlimit_htable *hinfo) +{ + dh->rateinfo.prev = jiffies; + if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg); + dh->rateinfo.credit_cap = hinfo->cfg.burst; + } else { + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + dh->rateinfo.credit_cap = dh->rateinfo.credit; + } } static inline __be32 maskl(__be32 a, unsigned int l) @@ -511,6 +562,21 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; } +static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) +{ + u64 tmp = xt_hashlimit_len_to_chunks(len); + tmp = tmp * dh->rateinfo.cost; + + if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ)) + tmp = CREDITS_PER_JIFFY_BYTES * HZ; + + if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) { + dh->rateinfo.credit_cap--; + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + } + return (u32) tmp; +} + static bool hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -519,6 +585,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -532,21 +599,21 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) goto hotdrop; } dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + rateinfo_init(dh, hinfo); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now); + rateinfo_recalc(dh, now, hinfo->cfg.mode); } - if (dh->rateinfo.credit >= dh->rateinfo.cost) { + if (info->cfg.mode & XT_HASHLIMIT_BYTES) + cost = hashlimit_byte_cost(skb->len, dh); + else + cost = dh->rateinfo.cost; + + if (dh->rateinfo.credit >= cost) { /* below the limit */ - dh->rateinfo.credit -= dh->rateinfo.cost; + dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); return !(info->cfg.mode & XT_HASHLIMIT_INVERT); @@ -568,14 +635,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) struct xt_hashlimit_mtinfo1 *info = par->matchinfo; int ret; - /* Check for overflow. */ - if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); - return -ERANGE; - } if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) return -EINVAL; if (info->name[sizeof(info->name)-1] != '\0') @@ -588,6 +647,26 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + pr_info("Unknown mode mask %X, kernel too old?\n", + info->cfg.mode); + return -EINVAL; + } + + /* Check for overflow. */ + if (info->cfg.mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(info->cfg.avg) == 0) { + pr_info("overflow, rate too high: %u\n", info->cfg.avg); + return -EINVAL; + } + } else if (info->cfg.burst == 0 || + user2credits(info->cfg.avg * info->cfg.burst) < + user2credits(info->cfg.avg)) { + pr_info("overflow, try lower: %u/%u\n", + info->cfg.avg, info->cfg.burst); + return -ERANGE; + } + mutex_lock(&hashlimit_mutex); info->hinfo = htable_find_get(net, info->name, par->family); if (info->hinfo == NULL) { @@ -680,10 +759,11 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { int res; + const struct xt_hashlimit_htable *ht = s->private; spin_lock(&ent->lock); /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies); + rateinfo_recalc(ent, jiffies, ht->cfg.mode); switch (family) { case NFPROTO_IPV4: diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 32b7a579a03..5c22ce8ab30 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -88,8 +88,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* Precision saver. */ -static u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -123,7 +122,7 @@ static int limit_mt_check(const struct xt_mtchk_param *par) 128. */ priv->prev = jiffies; priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } return 0; diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 8160f6b1435..d5b4fd4f91e 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -36,7 +36,7 @@ static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (skb_mac_header(skb) + ETH_HLEN > skb->data) return false; - ret = compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr) == 0; + ret = ether_addr_equal(eth_hdr(skb)->h_source, info->srcaddr); ret ^= info->invert; return ret; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index d2ff15a2412..fc0d6dbe5d1 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -314,7 +314,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par) #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; #endif - unsigned i; + unsigned int i; int ret = -EINVAL; if (unlikely(!hash_rnd_inited)) { diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 0ec8138aa47..035960ec5cb 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -44,6 +44,14 @@ const struct ip_set_adt_opt n = { \ .cmdflags = cfs, \ .timeout = t, \ } +#define ADT_MOPT(n, f, d, fs, cfs, t) \ +struct ip_set_adt_opt n = { \ + .family = f, \ + .dim = d, \ + .flags = fs, \ + .cmdflags = cfs, \ + .timeout = t, \ +} /* Revision 0 interface: backward compatible with netfilter/iptables */ @@ -296,11 +304,14 @@ static unsigned int set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, - info->add_set.flags, info->flags, info->timeout); + ADT_MOPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, info->del_set.flags, 0, UINT_MAX); + /* Normalize to fit into jiffies */ + if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC) + add_opt.timeout = UINT_MAX/MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 72bb07f57f9..9ea482d08cf 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -263,10 +263,10 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk; struct in6_addr *daddr, *saddr; __be16 dport, sport; - int thoff, tproto; + int thoff = 0, tproto; const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NF_DROP; |