diff options
Diffstat (limited to 'net/netfilter')
81 files changed, 2326 insertions, 909 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ad751fe2e82..ae5096ab65e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -496,10 +496,19 @@ config NFT_LIMIT This option adds the "limit" expression that you can use to ratelimit rule matchings. -config NFT_NAT +config NFT_MASQ depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT + tristate "Netfilter nf_tables masquerade support" + help + This option adds the "masquerade" expression that you can use + to perform NAT in the masquerade flavour. + +config NFT_NAT + depends on NF_TABLES + depends on NF_CONNTRACK + select NF_NAT tristate "Netfilter nf_tables nat module" help This option adds the "nat" expression that you can use to perform @@ -747,7 +756,9 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_LOG tristate "LOG target support" - depends on NF_LOG_IPV4 && NF_LOG_IPV6 + select NF_LOG_COMMON + select NF_LOG_IPV4 + select NF_LOG_IPV6 if IPV6 default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -764,6 +775,14 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_NAT + tristate '"SNAT and DNAT" targets support' + depends on NF_NAT + ---help--- + This option enables the SNAT and DNAT targets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NETMAP tristate '"NETMAP" target support' depends on NF_NAT @@ -837,6 +856,7 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8308624a406..a9571be3f79 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o +obj-$(CONFIG_NFT_MASQ) += nft_masq.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o @@ -95,7 +96,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o -obj-$(CONFIG_NF_NAT) += xt_nat.o +obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 1fbab0cdd30..024a2e25c8a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -35,11 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops); int nf_register_afinfo(const struct nf_afinfo *afinfo) { - int err; - - err = mutex_lock_interruptible(&afinfo_mutex); - if (err < 0) - return err; + mutex_lock(&afinfo_mutex); RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); mutex_unlock(&afinfo_mutex); return 0; @@ -58,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -68,18 +64,15 @@ static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) { struct nf_hook_ops *elem; - int err; - err = mutex_lock_interruptible(&nf_hook_mutex); - if (err < 0) - return err; + mutex_lock(&nf_hook_mutex); list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { if (reg->priority < elem->priority) break; } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -91,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 2f7f5c32c6f..234a8ec8207 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_MAC + tristate "hash:mac set support" + depends on IP_SET + help + This option adds the hash:mac set type support, by which + one can store MAC (ethernet address) elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NETPORTNET tristate "hash:net,port,net set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 231f10196cb..3dbd5e95848 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o +obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f2c7d83dc23..6f024a8a153 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -128,6 +128,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, return 0; if (SET_WITH_COUNTER(set)) ip_set_update_counter(ext_counter(x, set), ext, mext, flags); + if (SET_WITH_SKBINFO(set)) + ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags); return 1; } @@ -161,6 +163,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_init_counter(ext_counter(x, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(x, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(x, set), ext); return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 6f1f9f49480..55b083ec587 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ -#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ +/* 2 Comment support added */ +#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -112,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct bitmap_ip_adt_elem e = { }; + struct bitmap_ip_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; @@ -132,14 +133,17 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip = 0, ip_to = 0; - struct bitmap_ip_adt_elem e = { }; + struct bitmap_ip_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -357,6 +361,9 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 740eabededd..86104744b00 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -27,7 +27,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ -#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ +/* 2 Comment support added */ +#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -203,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, { struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct bitmap_ipmac_adt_elem e = {}; + struct bitmap_ipmac_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; @@ -232,7 +233,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], { const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct bitmap_ipmac_adt_elem e = {}; + struct bitmap_ipmac_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; @@ -240,7 +241,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -394,6 +398,9 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index cf99676e69f..005dd36444c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -22,7 +22,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ -#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ +/* 2 Comment support added */ +#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -104,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct bitmap_port_adt_elem e = {}; + struct bitmap_port_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -129,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct bitmap_port_adt_elem e = {}; + struct bitmap_port_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; @@ -139,7 +140,10 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -291,6 +295,9 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 5edbbe82949..86f9d76b146 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -101,7 +101,7 @@ load_settype(const char *name) nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { - pr_warning("Can't find ip_set type %s\n", name); + pr_warn("Can't find ip_set type %s\n", name); nfnl_lock(NFNL_SUBSYS_IPSET); return false; } @@ -195,20 +195,19 @@ ip_set_type_register(struct ip_set_type *type) int ret = 0; if (type->protocol != IPSET_PROTOCOL) { - pr_warning("ip_set type %s, family %s, revision %u:%u uses " - "wrong protocol version %u (want %u)\n", - type->name, family_name(type->family), - type->revision_min, type->revision_max, - type->protocol, IPSET_PROTOCOL); + pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n", + type->name, family_name(type->family), + type->revision_min, type->revision_max, + type->protocol, IPSET_PROTOCOL); return -EINVAL; } ip_set_type_lock(); if (find_set_type(type->name, type->family, type->revision_min)) { /* Duplicate! */ - pr_warning("ip_set type %s, family %s with revision min %u " - "already registered!\n", type->name, - family_name(type->family), type->revision_min); + pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", + type->name, family_name(type->family), + type->revision_min); ret = -EINVAL; goto unlock; } @@ -228,9 +227,9 @@ ip_set_type_unregister(struct ip_set_type *type) { ip_set_type_lock(); if (!find_set_type(type->name, type->family, type->revision_min)) { - pr_warning("ip_set type %s, family %s with revision min %u " - "not registered\n", type->name, - family_name(type->family), type->revision_min); + pr_warn("ip_set type %s, family %s with revision min %u not registered\n", + type->name, family_name(type->family), + type->revision_min); goto unlock; } list_del_rcu(&type->list); @@ -338,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [IPSET_EXT_ID_SKBINFO] = { + .type = IPSET_EXT_SKBINFO, + .flag = IPSET_FLAG_WITH_SKBINFO, + .len = sizeof(struct ip_set_skbinfo), + .align = __alignof__(struct ip_set_skbinfo), + }, [IPSET_EXT_ID_COMMENT] = { .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, .flag = IPSET_FLAG_WITH_COMMENT, @@ -383,6 +388,7 @@ int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) { + u64 fullmark; if (tb[IPSET_ATTR_TIMEOUT]) { if (!(set->extensions & IPSET_EXT_TIMEOUT)) return -IPSET_ERR_TIMEOUT; @@ -403,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_COMMENT; ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); } - + if (tb[IPSET_ATTR_SKBMARK]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); + ext->skbmark = fullmark >> 32; + ext->skbmarkmask = fullmark & 0xffffffff; + } + if (tb[IPSET_ATTR_SKBPRIO]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + ext->skbprio = be32_to_cpu(nla_get_be32( + tb[IPSET_ATTR_SKBPRIO])); + } + if (tb[IPSET_ATTR_SKBQUEUE]) { + if (!(set->extensions & IPSET_EXT_SKBINFO)) + return -IPSET_ERR_SKBINFO; + ext->skbqueue = be16_to_cpu(nla_get_be16( + tb[IPSET_ATTR_SKBQUEUE])); + } return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); @@ -635,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - if (index > inst->ip_set_max) + if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); @@ -1398,7 +1422,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); struct sk_buff *skb2; struct nlmsgerr *errmsg; - size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); + size_t payload = min(SIZE_MAX, + sizeof(*errmsg) + nlmsg_len(nlh)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *cmdattr; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 61c7fb05280..fee7c64e4dd 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -565,8 +565,8 @@ retry: set->name, orig->htable_bits, htable_bits, orig); if (!htable_bits) { /* In case we have plenty of memory :-) */ - pr_warning("Cannot increase the hashsize of set %s further\n", - set->name); + pr_warn("Cannot increase the hashsize of set %s further\n", + set->name); return -IPSET_ERR_HASH_FULL; } t = ip_set_alloc(sizeof(*t) @@ -651,8 +651,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (h->elements >= h->maxelem) { if (net_ratelimit()) - pr_warning("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); + pr_warn("Set %s is full, maxelem %u reached\n", + set->name, h->maxelem); return -IPSET_ERR_HASH_FULL; } @@ -720,6 +720,8 @@ reuse_slot: ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(data, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(data, set), ext); out: rcu_read_unlock_bh(); @@ -797,6 +799,9 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_update_counter(ext_counter(data, set), ext, mext, flags); + if (SET_WITH_SKBINFO(set)) + ip_set_get_skbinfo(ext_skbinfo(data, set), + ext, mext, flags); return mtype_do_data_match(data); } @@ -998,8 +1003,8 @@ mtype_list(const struct ip_set *set, nla_put_failure: nlmsg_trim(skb, incomplete); if (unlikely(first == cb->args[IPSET_CB_ARG0])) { - pr_warning("Can't list set %s: one bucket does not fit into " - "a message. Please report it!\n", set->name); + pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", + set->name); cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } @@ -1049,8 +1054,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, struct HTYPE *h; struct htable *t; +#ifndef IP_SET_PROTO_UNDEF if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; +#endif #ifdef IP_SET_HASH_WITH_MARKMASK markmask = 0xffffffff; @@ -1093,7 +1100,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (tb[IPSET_ATTR_MARKMASK]) { markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK])); - if ((markmask > 4294967295u) || markmask == 0) + if (markmask == 0) return -IPSET_ERR_INVALID_MARKMASK; } #endif @@ -1132,25 +1139,32 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, rcu_assign_pointer(h->table, t); set->data = h; +#ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { +#endif set->variant = &IPSET_TOKEN(HTYPE, 4_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); +#ifndef IP_SET_PROTO_UNDEF } else { set->variant = &IPSET_TOKEN(HTYPE, 6_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); } +#endif if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +#ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) +#endif IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); +#ifndef IP_SET_PROTO_UNDEF else IPSET_TOKEN(HTYPE, 6_gc_init)(set, IPSET_TOKEN(HTYPE, 6_gc)); +#endif } - pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", set->name, jhash_size(t->htable_bits), t->htable_bits, h->maxelem, set->data, t); diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index dd40607f878..76959d79e9d 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support */ /* 2 Comments support */ -#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */ +/* 3 Forceadd support */ +#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -84,7 +85,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ip4_elem e = {}; + struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; @@ -103,7 +104,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ip4_elem e = {}; + struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts; int ret = 0; @@ -111,7 +112,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -222,7 +226,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ip6_elem e = {}; + struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); @@ -239,7 +243,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ip6_elem e = {}; + struct hash_ip6_elem e = { { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; @@ -247,6 +251,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; @@ -295,6 +302,9 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 4eff0a29725..7abf9788cfa 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -25,7 +25,8 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */ +/* 1 Forceadd support */ +#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>"); @@ -113,7 +114,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -244,6 +248,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; @@ -301,6 +308,9 @@ static struct ip_set_type hash_ipmark_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7597b82a8b0..dcbcceb9a52 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -28,7 +28,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ -#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ +/* 4 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -94,7 +95,7 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipport4_elem e = { }; + struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, @@ -111,7 +112,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipport4_elem e = { }; + struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; @@ -122,7 +123,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -258,7 +262,7 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipport6_elem e = { }; + struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, @@ -275,7 +279,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipport6_elem e = { }; + struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; @@ -287,6 +291,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; @@ -370,6 +377,9 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 672655ffd57..7ef93fc887a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -28,7 +28,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ -#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */ +/* 4 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -95,7 +96,7 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportip4_elem e = { }; + struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, @@ -113,7 +114,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportip4_elem e = { }; + struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; @@ -124,7 +125,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -265,7 +269,7 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportip6_elem e = { }; + struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, @@ -283,7 +287,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportip6_elem e = { }; + struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; @@ -295,6 +299,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; @@ -382,6 +389,9 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7308d84f927..b6012ad9278 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -30,7 +30,8 @@ /* 3 nomatch flag support added */ /* 4 Counters support added */ /* 5 Comments support added */ -#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ +/* 6 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -179,7 +180,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -432,6 +436,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) || tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; @@ -541,6 +548,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c new file mode 100644 index 00000000000..65690b52a4d --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -0,0 +1,173 @@ +/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:mac type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/if_ether.h> +#include <net/netlink.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:mac"); + +/* Type specific function prefix */ +#define HTYPE hash_mac + +/* Member elements */ +struct hash_mac4_elem { + /* Zero valued IP addresses cannot be stored */ + union { + unsigned char ether[ETH_ALEN]; + __be32 foo[2]; + }; +}; + +/* Common functions */ + +static inline bool +hash_mac4_data_equal(const struct hash_mac4_elem *e1, + const struct hash_mac4_elem *e2, + u32 *multi) +{ + return ether_addr_equal(e1->ether, e2->ether); +} + +static inline bool +hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e) +{ + return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether); +} + +static inline void +hash_mac4_data_next(struct hash_mac4_elem *next, + const struct hash_mac4_elem *e) +{ +} + +#define MTYPE hash_mac4 +#define PF 4 +#define HOST_MASK 32 +#define IP_SET_EMIT_CREATE +#define IP_SET_PROTO_UNDEF +#include "ip_set_hash_gen.h" + +/* Zero valued element is not supported */ +static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; + +static int +hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + /* MAC can be src only */ + if (!(opt->flags & IPSET_DIM_ONE_SRC)) + return 0; + + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) + return -EINVAL; + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_ETHER] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) + return -IPSET_ERR_HASH_ELEM; + + return adtfn(set, &e, &ext, &ext, flags); +} + +static struct ip_set_type hash_mac_type __read_mostly = { + .name = "hash:mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_MAC, + .dimension = IPSET_DIM_ONE, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_mac_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, + .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_mac_init(void) +{ + return ip_set_type_register(&hash_mac_type); +} + +static void __exit +hash_mac_fini(void) +{ + ip_set_type_unregister(&hash_mac_type); +} + +module_init(hash_mac_init); +module_exit(hash_mac_fini); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 4c7d495783a..6b3ac10ac2f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -27,7 +27,8 @@ /* 2 nomatch flag support added */ /* 3 Counters support added */ /* 4 Comments support added */ -#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ +/* 5 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -150,7 +151,10 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -318,7 +322,10 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -377,6 +384,9 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index db2606805b3..35dd3587344 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -28,7 +28,8 @@ /* 2 /0 support added */ /* 3 Counters support added */ /* 4 Comments support added */ -#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */ +/* 5 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -236,7 +237,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (!nf_bridge) @@ -281,7 +282,10 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -470,7 +474,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, ip6_netmask(&e.ip, e.cidr); if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const struct nf_bridge_info *nf_bridge = skb->nf_bridge; if (!nf_bridge) @@ -514,7 +518,10 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -590,6 +597,9 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 3e99987e4bf..da00284b357 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -24,7 +24,8 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ +/* 1 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); @@ -171,7 +172,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -203,7 +207,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], flags |= (IPSET_FLAG_NOMATCH << 16); } - if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] && + if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) { e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); @@ -219,9 +223,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; if (ip_to < ip) swap(ip, ip_to); - if (ip + UINT_MAX == ip_to) + if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } + } else + ip_set_mask_from_to(ip, ip_to, e.cidr[0]); ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { @@ -230,10 +235,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; if (ip2_to < ip2_from) swap(ip2_from, ip2_to); - if (ip2_from + UINT_MAX == ip2_to) + if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - - } + } else + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); if (retried) ip = ntohl(h->next.ip[0]); @@ -393,7 +398,10 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -461,6 +469,9 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 1c645fbd09c..c0ddb58d19d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -29,7 +29,8 @@ /* 3 nomatch flag support added */ /* 4 Counters support added */ /* 5 Comments support added */ -#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */ +/* 6 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -172,7 +173,10 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -389,7 +393,10 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -489,6 +496,9 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index c0d2ba73f8b..b8053d675fc 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 0 Comments support added */ -#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */ +/* 1 Forceadd support added */ +#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); @@ -189,7 +190,10 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -257,7 +261,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } + } else + ip_set_mask_from_to(ip, ip_to, e.cidr[0]); port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { @@ -275,7 +280,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } + } else + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); if (retried) ip = ntohl(h->next.ip[0]); @@ -458,7 +464,10 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -567,6 +576,9 @@ static struct ip_set_type hash_netportnet_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 3e2317f3cf6..f8f682806e3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -17,7 +17,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Counters support added */ -#define IPSET_TYPE_REV_MAX 2 /* Comments support added */ +/* 2 Comments support added */ +#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -73,6 +74,10 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); + if (SET_WITH_SKBINFO(set)) + ip_set_get_skbinfo(ext_skbinfo(e, set), + ext, &opt->ext, + cmdflags); return ret; } } @@ -197,6 +202,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(e, set), ext); return 0; } @@ -307,6 +314,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(map->net, d->id); return 0; @@ -378,7 +387,10 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_LINENO]) @@ -597,7 +609,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + + min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize, + GFP_KERNEL); if (!map) return false; @@ -665,6 +679,9 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, + [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, + [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 0c3b1670b0d..3b6929dec74 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -152,6 +152,16 @@ config IP_VS_WLC If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +config IP_VS_FO + tristate "weighted failover scheduling" + ---help--- + The weighted failover scheduling algorithm directs network + connections to the server with the highest weight that is + currently available. + + If you want to compile it in kernel, say Y. To compile it as a + module, choose M here. If unsure, say N. + config IP_VS_LBLC tristate "locality-based least-connection scheduling" ---help--- diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 34ee602ddb6..38b2723b2e3 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o +obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 610e19c0e13..b0f7b626b56 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -27,6 +27,7 @@ #include <linux/interrupt.h> #include <linux/in.h> +#include <linux/inet.h> #include <linux/net.h> #include <linux/kernel.h> #include <linux/module.h> @@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly; #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) +/* We need an addrstrlen that works with or without v6 */ +#ifdef CONFIG_IP_VS_IPV6 +#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN +#else +#define IP_VS_ADDRSTRLEN (8+1) +#endif + struct ip_vs_aligned_lock { spinlock_t l; @@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) break; case IP_VS_CONN_F_TUNNEL: - cp->packet_xmit = ip_vs_tunnel_xmit; +#ifdef CONFIG_IP_VS_IPV6 + if (cp->daf == AF_INET6) + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + else +#endif + cp->packet_xmit = ip_vs_tunnel_xmit; break; case IP_VS_CONN_F_DROUTE: @@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) break; case IP_VS_CONN_F_TUNNEL: - cp->packet_xmit = ip_vs_tunnel_xmit_v6; + if (cp->daf == AF_INET6) + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + else + cp->packet_xmit = ip_vs_tunnel_xmit; break; case IP_VS_CONN_F_DROUTE: @@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) ip_vs_proto_name(cp->protocol), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), - IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); @@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; rcu_read_lock(); - dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + + /* This function is only invoked by the synchronization code. We do + * not currently support heterogeneous pools with synchronization, + * so we can make the assumption that the svc_af is the same as the + * dest_af + */ + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); if (dest) { @@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) ip_vs_proto_name(cp->protocol), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), - IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); @@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) ntohs(ct->cport), IP_VS_DBG_ADDR(ct->af, &ct->vaddr), ntohs(ct->vport), - IP_VS_DBG_ADDR(ct->af, &ct->daddr), + IP_VS_DBG_ADDR(ct->daf, &ct->daddr), ntohs(ct->dport)); /* @@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(const struct ip_vs_conn_param *p, +ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, const union nf_inet_addr *daddr, __be16 dport, unsigned int flags, struct ip_vs_dest *dest, __u32 fwmark) { @@ -867,6 +889,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); ip_vs_conn_net_set(cp, p->net); cp->af = p->af; + cp->daf = dest_af; cp->protocol = p->protocol; ip_vs_addr_set(p->af, &cp->caddr, p->caddr); cp->cport = p->cport; @@ -874,7 +897,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, &cp->vaddr, p->vaddr); cp->vport = p->vport; - ip_vs_addr_set(p->af, &cp->daddr, daddr); + ip_vs_addr_set(cp->daf, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; cp->fwmark = fwmark; @@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; + char dbuf[IP_VS_ADDRSTRLEN]; if (!ip_vs_conn_net_eq(cp, net)) return 0; @@ -1050,24 +1074,32 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) pe_data[len] = '\0'; #ifdef CONFIG_IP_VS_IPV6 + if (cp->daf == AF_INET6) + snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6); + else +#endif + snprintf(dbuf, sizeof(dbuf), "%08X", + ntohl(cp->daddr.ip)); + +#ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " - "%pI6 %04X %-11s %7lu%s\n", + "%s %04X %-11s %7lu%s\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), - &cp->daddr.in6, ntohs(cp->dport), + dbuf, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ, pe_data); else #endif seq_printf(seq, "%-3s %08X %04X %08X %04X" - " %08X %04X %-11s %7lu%s\n", + " %s %04X %-11s %7lu%s\n", ip_vs_proto_name(cp->protocol), ntohl(cp->caddr.ip), ntohs(cp->cport), ntohl(cp->vaddr.ip), ntohs(cp->vport), - ntohl(cp->daddr.ip), ntohs(cp->dport), + dbuf, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ, pe_data); } @@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags) static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) { + char dbuf[IP_VS_ADDRSTRLEN]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1117,12 +1150,21 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) return 0; #ifdef CONFIG_IP_VS_IPV6 + if (cp->daf == AF_INET6) + snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6); + else +#endif + snprintf(dbuf, sizeof(dbuf), "%08X", + ntohl(cp->daddr.ip)); + +#ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " + "%s %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), - &cp->daddr.in6, ntohs(cp->dport), + dbuf, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); @@ -1130,11 +1172,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) #endif seq_printf(seq, "%-3s %08X %04X %08X %04X " - "%08X %04X %-11s %-6s %7lu\n", + "%s %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), ntohl(cp->caddr.ip), ntohs(cp->cport), ntohl(cp->vaddr.ip), ntohs(cp->vport), - ntohl(cp->daddr.ip), ntohs(cp->dport), + dbuf, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e6836755c45..990decba1fe 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * This adds param.pe_data to the template, * and thus param.pe_data will be destroyed * when the template expires */ - ct = ip_vs_conn_new(¶m, &dest->addr, dport, + ct = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); @@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, src_port, &iph->daddr, dst_port, ¶m); - cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); + cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest, + skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); *ignored = -1; @@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); - cp = ip_vs_conn_new(&p, &dest->addr, + cp = ip_vs_conn_new(&p, dest->af, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); if (!cp) { @@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), - IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport), - IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport), - IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); @@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); - cp = ip_vs_conn_new(&p, &daddr, 0, + cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); if (!cp) @@ -1906,7 +1907,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { { .hook = ip_vs_local_reply6, .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8416307fdd1..ac7ba689efe 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -574,8 +574,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, * Called under RCU lock. */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, - __be16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, + const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest; @@ -583,9 +583,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * Find the destination for the given service */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { - if ((dest->af == svc->af) - && ip_vs_addr_equal(svc->af, &dest->addr, daddr) - && (dest->port == dport)) { + if ((dest->af == dest_af) && + ip_vs_addr_equal(dest_af, &dest->addr, daddr) && + (dest->port == dport)) { /* HIT */ return dest; } @@ -602,7 +602,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * on the backup. * Called under RCU lock, no refcnt is returned. */ -struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, +struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, @@ -613,14 +613,14 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) port = 0; - dest = ip_vs_lookup_dest(svc, daddr, port); + dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); if (!dest) - dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); + dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); return dest; } @@ -657,8 +657,8 @@ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, - __be16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, + const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest; struct netns_ipvs *ipvs = net_ipvs(svc->net); @@ -671,11 +671,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, - IP_VS_DBG_ADDR(svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - if (dest->af == svc->af && - ip_vs_addr_equal(svc->af, &dest->addr, daddr) && + if (dest->af == dest_af && + ip_vs_addr_equal(dest_af, &dest->addr, daddr) && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && @@ -779,6 +779,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_scheduler *sched; int conn_flags; + /* We cannot modify an address and change the address family */ + BUG_ON(!add && udest->af != dest->af); + + if (add && udest->af != svc->af) + ipvs->mixed_address_family_dests++; + /* set the weight and the flags */ atomic_set(&dest->weight, udest->weight); conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; @@ -816,6 +822,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; + dest->af = udest->af; + spin_lock_bh(&dest->dst_lock); __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); @@ -847,7 +855,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, EnterFunction(2); #ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) { + if (udest->af == AF_INET6) { atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && @@ -875,12 +883,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, u64_stats_init(&ip_vs_dest_stats->syncp); } - dest->af = svc->af; + dest->af = udest->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); + ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -928,11 +936,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } - ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ rcu_read_lock(); - dest = ip_vs_lookup_dest(svc, &daddr, dport); + dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); rcu_read_unlock(); if (dest != NULL) { @@ -944,12 +952,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, &daddr, dport); + dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); if (dest != NULL) { IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " "dest->refcnt=%d, service %u/%s:%u\n", - IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), atomic_read(&dest->refcnt), dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->vaddr), @@ -992,11 +1000,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } - ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ rcu_read_lock(); - dest = ip_vs_lookup_dest(svc, &daddr, dport); + dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); rcu_read_unlock(); if (dest == NULL) { @@ -1055,6 +1063,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, list_del_rcu(&dest->n_list); svc->num_dests--; + if (dest->af != svc->af) + net_ipvs(svc->net)->mixed_address_family_dests--; + if (svcupd) { struct ip_vs_scheduler *sched; @@ -1078,7 +1089,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* We use function that requires RCU lock */ rcu_read_lock(); - dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); rcu_read_unlock(); if (dest == NULL) { @@ -2179,29 +2190,41 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) return 0; } +#define CMDID(cmd) (cmd - IP_VS_BASE_CTL) -#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) -#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user)) -#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \ - sizeof(struct ip_vs_dest_user)) -#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) -#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user)) -#define MAX_ARG_LEN SVCDEST_ARG_LEN - -static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { - [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0, - [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN, - [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, +struct ip_vs_svcdest_user { + struct ip_vs_service_user s; + struct ip_vs_dest_user d; }; +static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { + [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), + [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), + [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), + [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), + [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), + [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), + [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), + [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), + [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), + [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), +}; + +union ip_vs_set_arglen { + struct ip_vs_service_user field_IP_VS_SO_SET_ADD; + struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; + struct ip_vs_service_user field_IP_VS_SO_SET_DEL; + struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; + struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; + struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; + struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; + struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; + struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; + struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; +}; + +#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) + static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, struct ip_vs_service_user *usvc_compat) { @@ -2232,6 +2255,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, udest->weight = udest_compat->weight; udest->u_threshold = udest_compat->u_threshold; udest->l_threshold = udest_compat->l_threshold; + udest->af = AF_INET; } static int @@ -2239,7 +2263,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { struct net *net = sock_net(sk); int ret; - unsigned char arg[MAX_ARG_LEN]; + unsigned char arg[MAX_SET_ARGLEN]; struct ip_vs_service_user *usvc_compat; struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; @@ -2247,16 +2271,15 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) struct ip_vs_dest_user_kern udest; struct netns_ipvs *ipvs = net_ipvs(net); + BUILD_BUG_ON(sizeof(arg) > 255); if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) return -EINVAL; - if (len < 0 || len > MAX_ARG_LEN) - return -EINVAL; - if (len != set_arglen[SET_CMDID(cmd)]) { - pr_err("set_ctl: len %u != %u\n", - len, set_arglen[SET_CMDID(cmd)]); + if (len != set_arglen[CMDID(cmd)]) { + IP_VS_DBG(1, "set_ctl: len %u != %u\n", + len, set_arglen[CMDID(cmd)]); return -EINVAL; } @@ -2271,10 +2294,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - if (mutex_lock_interruptible(&ipvs->sync_mutex)) { - ret = -ERESTARTSYS; - goto out_dec; - } + mutex_lock(&ipvs->sync_mutex); if (cmd == IP_VS_SO_SET_STARTDAEMON) ret = start_sync_thread(net, dm->state, dm->mcast_ifn, dm->syncid); @@ -2284,11 +2304,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_dec; } - if (mutex_lock_interruptible(&__ip_vs_mutex)) { - ret = -ERESTARTSYS; - goto out_dec; - } - + mutex_lock(&__ip_vs_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ ret = ip_vs_flush(net, false); @@ -2476,6 +2492,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; + /* Cannot expose heterogeneous members via sockopt + * interface + */ + if (dest->af != svc->af) + continue; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); @@ -2519,51 +2541,51 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) #endif } +static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { + [CMDID(IP_VS_SO_GET_VERSION)] = 64, + [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), + [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), + [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), + [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), + [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), + [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), +}; -#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) -#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo)) -#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services)) -#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry)) -#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests)) -#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) -#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2) - -static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = { - [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64, - [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN, - [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN, - [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN, - [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN, - [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN, - [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN, +union ip_vs_get_arglen { + char field_IP_VS_SO_GET_VERSION[64]; + struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; + struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; + struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; + struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; + struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; + struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; }; +#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) + static int do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { - unsigned char arg[128]; + unsigned char arg[MAX_GET_ARGLEN]; int ret = 0; unsigned int copylen; struct net *net = sock_net(sk); struct netns_ipvs *ipvs = net_ipvs(net); BUG_ON(!net); + BUILD_BUG_ON(sizeof(arg) > 255); if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) return -EINVAL; - if (*len < get_arglen[GET_CMDID(cmd)]) { - pr_err("get_ctl: len %u < %u\n", - *len, get_arglen[GET_CMDID(cmd)]); + copylen = get_arglen[CMDID(cmd)]; + if (*len < (int) copylen) { + IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); return -EINVAL; } - copylen = get_arglen[GET_CMDID(cmd)]; - if (copylen > 128) - return -EINVAL; - if (copy_from_user(arg, user, copylen) != 0) return -EFAULT; /* @@ -2573,9 +2595,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_daemon_user d[2]; memset(&d, 0, sizeof(d)); - if (mutex_lock_interruptible(&ipvs->sync_mutex)) - return -ERESTARTSYS; - + mutex_lock(&ipvs->sync_mutex); if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, @@ -2594,9 +2614,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } - if (mutex_lock_interruptible(&__ip_vs_mutex)) - return -ERESTARTSYS; - + mutex_lock(&__ip_vs_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: { @@ -2777,6 +2795,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, + [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, @@ -3032,7 +3051,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, atomic_read(&dest->inactconns)) || nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, - atomic_read(&dest->persistconns))) + atomic_read(&dest->persistconns)) || + nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) goto nla_put_failure; if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) goto nla_put_failure; @@ -3113,6 +3133,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, { struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; struct nlattr *nla_addr, *nla_port; + struct nlattr *nla_addr_family; /* Parse mandatory identifying destination fields first */ if (nla == NULL || @@ -3121,6 +3142,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; nla_port = attrs[IPVS_DEST_ATTR_PORT]; + nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; if (!(nla_addr && nla_port)) return -EINVAL; @@ -3130,6 +3152,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); udest->port = nla_get_be16(nla_port); + if (nla_addr_family) + udest->af = nla_get_u16(nla_addr_family); + else + udest->af = 0; + /* If a full entry was requested, check for the additional fields */ if (full_entry) { struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, @@ -3234,6 +3261,12 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; + /* The synchronization protocol is incompatible with mixed family + * services + */ + if (net_ipvs(net)->mixed_address_family_dests > 0) + return -EINVAL; + return start_sync_thread(net, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), @@ -3357,6 +3390,35 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) need_full_dest); if (ret) goto out; + + /* Old protocols did not allow the user to specify address + * family, so we set it to zero instead. We also didn't + * allow heterogeneous pools in the old code, so it's safe + * to assume that this will have the same address family as + * the service. + */ + if (udest.af == 0) + udest.af = svc->af; + + if (udest.af != svc->af) { + /* The synchronization protocol is incompatible + * with mixed family services + */ + if (net_ipvs(net)->sync_state) { + ret = -EINVAL; + goto out; + } + + /* Which connection types do we support? */ + switch (udest.conn_flags) { + case IP_VS_CONN_F_TUNNEL: + /* We are able to forward this */ + break; + default: + ret = -EINVAL; + goto out; + } + } } switch (cmd) { diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index c3b84546ea9..6be5c538b71 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -234,7 +234,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph->daddr), - IP_VS_DBG_ADDR(svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c new file mode 100644 index 00000000000..e09874d0293 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_fo.c @@ -0,0 +1,79 @@ +/* + * IPVS: Weighted Fail Over module + * + * Authors: Kenny Mathis <kmathis@chokepoint.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes: + * Kenny Mathis : added initial functionality based on weight + * + */ + +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> + +#include <net/ip_vs.h> + +/* Weighted Fail Over Module */ +static struct ip_vs_dest * +ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest, *hweight = NULL; + int hw = 0; /* Track highest weight */ + + IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n"); + + /* Basic failover functionality + * Find virtual server with highest weight and send it traffic + */ + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { + if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && + atomic_read(&dest->weight) > hw) { + hweight = dest; + hw = atomic_read(&dest->weight); + } + } + + if (hweight) { + IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n", + IP_VS_DBG_ADDR(hweight->af, &hweight->addr), + ntohs(hweight->port), + atomic_read(&hweight->activeconns), + atomic_read(&hweight->weight)); + return hweight; + } + + ip_vs_scheduler_err(svc, "no destination available"); + return NULL; +} + +static struct ip_vs_scheduler ip_vs_fo_scheduler = { + .name = "fo", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list), + .schedule = ip_vs_fo_schedule, +}; + +static int __init ip_vs_fo_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_fo_scheduler); +} + +static void __exit ip_vs_fo_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_fo_scheduler); + synchronize_rcu(); +} + +module_init(ip_vs_fo_init); +module_exit(ip_vs_fo_cleanup); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77c173282f3..1d5341f3761 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -96,13 +96,13 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (data_limit - data < plen) { /* check if there is partial match */ - if (strnicmp(data, pattern, data_limit - data) == 0) + if (strncasecmp(data, pattern, data_limit - data) == 0) return -1; else return 0; } - if (strnicmp(data, pattern, plen) != 0) { + if (strncasecmp(data, pattern, plen) != 0) { return 0; } s = data + plen; @@ -233,7 +233,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); - n_cp = ip_vs_conn_new(&p, &from, port, + /* As above, this is ipv4 only */ + n_cp = ip_vs_conn_new(&p, AF_INET, &from, port, IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NFCT, cp->dest, skb->mark); @@ -353,7 +354,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, data_limit = skb_tail_pointer(skb); while (data <= data_limit - 6) { - if (strnicmp(data, "PASV\r\n", 6) == 0) { + if (strncasecmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ IP_VS_DBG(7, "got PASV at %td of %td\n", data - data_start, @@ -396,7 +397,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { - n_cp = ip_vs_conn_new(&p, &cp->daddr, + /* This is ipv4 only */ + n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr, htons(ntohs(cp->dport)-1), IP_VS_CONN_F_NFCT, cp->dest, skb->mark); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 547ff33c1ef..127f14046c5 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -199,11 +199,11 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, - struct ip_vs_dest *dest) + u16 af, struct ip_vs_dest *dest) { struct ip_vs_lblc_entry *en; - en = ip_vs_lblc_get(dest->af, tbl, daddr); + en = ip_vs_lblc_get(af, tbl, daddr); if (en) { if (en->dest == dest) return en; @@ -213,8 +213,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, if (!en) return NULL; - en->af = dest->af; - ip_vs_addr_copy(dest->af, &en->addr, daddr); + en->af = af; + ip_vs_addr_copy(af, &en->addr, daddr); en->lastuse = jiffies; ip_vs_dest_hold(dest); @@ -521,13 +521,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblc_new(tbl, &iph->daddr, dest); + ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph->daddr), - IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 3f21a2f47de..2229d2d8bbe 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -362,18 +362,18 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, - struct ip_vs_dest *dest) + u16 af, struct ip_vs_dest *dest) { struct ip_vs_lblcr_entry *en; - en = ip_vs_lblcr_get(dest->af, tbl, daddr); + en = ip_vs_lblcr_get(af, tbl, daddr); if (!en) { en = kmalloc(sizeof(*en), GFP_ATOMIC); if (!en) return NULL; - en->af = dest->af; - ip_vs_addr_copy(dest->af, &en->addr, daddr); + en->af = af; + ip_vs_addr_copy(af, &en->addr, daddr); en->lastuse = jiffies; /* initialize its dest set */ @@ -706,13 +706,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblcr_new(tbl, &iph->daddr, dest); + ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph->daddr), - IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 2bdcb1cf212..19a0769a989 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -59,7 +59,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, else IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d " "inactconns %d\n", - IP_VS_DBG_ADDR(svc->af, &least->addr), + IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->inactconns)); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 961a6de9bb2..a8b63401e77 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -107,7 +107,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, out: IP_VS_DBG_BUF(6, "NQ: server %s:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 2f7ea756404..5b84c0b5664 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), - IP_VS_DBG_ADDR(cp->af, &cp->daddr), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index e3a697234a9..8e92beb0cca 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, th->fin ? 'F' : '.', th->ack ? 'A' : '.', th->rst ? 'R' : '.', - IP_VS_DBG_ADDR(cp->af, &cp->daddr), + IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 176b87c35e3..58bacfc461e 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -95,7 +95,7 @@ stop: spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", - IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index e446b9fa742..f8e2d00f528 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -108,7 +108,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_BUF(6, "SED: server %s:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index cc65b2f42cd..98a13433b68 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -138,7 +138,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", - IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); /* if the original dest is unavailable, loop around the table * starting from ihash to find a new dest @@ -153,7 +153,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable " "server %s:%d (offset %d), reselecting", - IP_VS_DBG_ADDR(svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), roffset); } @@ -192,7 +192,7 @@ ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) RCU_INIT_POINTER(b->dest, dest); IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", - i, IP_VS_DBG_ADDR(svc->af, &dest->addr), + i, IP_VS_DBG_ADDR(dest->af, &dest->addr), atomic_read(&dest->weight)); /* Don't move to next dest until filling weight */ @@ -342,7 +342,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph->saddr), - IP_VS_DBG_ADDR(svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index eadffb29dec..7162c86fd50 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -880,10 +880,17 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * but still handled. */ rcu_read_lock(); - dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, - param->vport, protocol, fwmark, flags); + /* This function is only invoked by the synchronization + * code. We do not currently support heterogeneous pools + * with synchronization, so we can make the assumption that + * the svc_af is the same as the dest_af + */ + dest = ip_vs_find_dest(net, type, type, daddr, dport, + param->vaddr, param->vport, protocol, + fwmark, flags); - cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); + cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest, + fwmark); rcu_read_unlock(); if (!cp) { kfree(param->pe_data); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index b5b4650d50a..6b366fd9055 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -80,7 +80,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_BUF(6, "WLC: server %s:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0546cd572d6..17e6d4406ca 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -216,7 +216,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, found: IP_VS_DBG_BUF(6, "WRR: server %s:%u " "activeconns %d refcnt %d weight %d\n", - IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6f70bdd3a90..437a3663ad0 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -38,6 +38,7 @@ #include <net/route.h> /* for ip_route_output */ #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/ip_tunnels.h> #include <net/addrconf.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> @@ -156,18 +157,113 @@ retry: return rt; } +#ifdef CONFIG_IP_VS_IPV6 +static inline int __ip_vs_is_local_route6(struct rt6_info *rt) +{ + return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; +} +#endif + +static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, + int rt_mode, + bool new_rt_is_local) +{ + bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); + bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); + bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); + bool source_is_loopback; + bool old_rt_is_local; + +#ifdef CONFIG_IP_VS_IPV6 + if (skb_af == AF_INET6) { + int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); + + source_is_loopback = + (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + (addr_type & IPV6_ADDR_LOOPBACK); + old_rt_is_local = __ip_vs_is_local_route6( + (struct rt6_info *)skb_dst(skb)); + } else +#endif + { + source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); + old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; + } + + if (unlikely(new_rt_is_local)) { + if (!rt_mode_allow_local) + return true; + if (!rt_mode_allow_redirect && !old_rt_is_local) + return true; + } else { + if (!rt_mode_allow_non_local) + return true; + if (source_is_loopback) + return true; + } + return false; +} + +static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) +{ + struct sock *sk = skb->sk; + struct rtable *ort = skb_rtable(skb); + + if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); +} + +static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, + struct ip_vs_iphdr *ipvsh, + struct sk_buff *skb, int mtu) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (skb_af == AF_INET6) { + struct net *net = dev_net(skb_dst(skb)->dev); + + if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { + if (!skb->dev) + skb->dev = net->loopback_dev; + /* only send ICMP too big on first fragment */ + if (!ipvsh->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + IP_VS_DBG(1, "frag needed for %pI6c\n", + &ipv6_hdr(skb)->saddr); + return false; + } + } else +#endif + { + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + + /* If we're going to tunnel the packet and pmtu discovery + * is disabled, we'll just fragment it anyway + */ + if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) + return true; + + if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && + skb->len > mtu && !skb_is_gso(skb))) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + IP_VS_DBG(1, "frag needed for %pI4\n", + &ip_hdr(skb)->saddr); + return false; + } + } + + return true; +} + /* Get route to destination or remote server */ static int -__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, int rt_mode, __be32 *ret_saddr) +__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, + __be32 daddr, int rt_mode, __be32 *ret_saddr, + struct ip_vs_iphdr *ipvsh) { struct net *net = dev_net(skb_dst(skb)->dev); - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ - struct rtable *ort; /* Original route */ - struct iphdr *iph; - __be16 df; int mtu; int local, noref = 1; @@ -217,30 +313,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; - if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & - rt_mode)) { - IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", - (rt->rt_flags & RTCF_LOCAL) ? - "local":"non-local", &daddr); + if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, + local))) { + IP_VS_DBG_RL("We are crossing local and non-local addresses" + " daddr=%pI4\n", &daddr); goto err_put; } - iph = ip_hdr(skb); - if (likely(!local)) { - if (unlikely(ipv4_is_loopback(iph->saddr))) { - IP_VS_DBG_RL("Stopping traffic from loopback address " - "%pI4 to non-local address, dest: %pI4\n", - &iph->saddr, &daddr); - goto err_put; - } - } else { - ort = skb_rtable(skb); - if (!(rt_mode & IP_VS_RT_MODE_RDR) && - !(ort->rt_flags & RTCF_LOCAL)) { - IP_VS_DBG_RL("Redirect from non-local address %pI4 to " - "local requires NAT method, dest: %pI4\n", - &iph->daddr, &daddr); - goto err_put; - } + + if (unlikely(local)) { /* skb to local stack, preserve old route */ if (!noref) ip_rt_put(rt); @@ -249,28 +329,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { mtu = dst_mtu(&rt->dst); - df = iph->frag_off & htons(IP_DF); } else { - struct sock *sk = skb->sk; - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; } - ort = skb_rtable(skb); - if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); - /* MTU check allowed? */ - df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0; + maybe_update_pmtu(skb_af, skb, mtu); } - /* MTU checking */ - if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr); + if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; - } skb_dst_drop(skb); if (noref) { @@ -294,12 +363,6 @@ err_unreach: } #ifdef CONFIG_IP_VS_IPV6 - -static inline int __ip_vs_is_local_route6(struct rt6_info *rt) -{ - return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; -} - static struct dst_entry * __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) @@ -338,14 +401,13 @@ out_err: * Get route to destination or remote server */ static int -__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, +__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ - struct rt6_info *ort; /* Original route */ struct dst_entry *dst; int mtu; int local, noref = 1; @@ -392,32 +454,15 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } local = __ip_vs_is_local_route6(rt); - if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & - rt_mode)) { - IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", - local ? "local":"non-local", daddr); + + if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, + local))) { + IP_VS_DBG_RL("We are crossing local and non-local addresses" + " daddr=%pI6\n", daddr); goto err_put; } - if (likely(!local)) { - if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - ipv6_addr_type(&ipv6_hdr(skb)->saddr) & - IPV6_ADDR_LOOPBACK)) { - IP_VS_DBG_RL("Stopping traffic from loopback address " - "%pI6c to non-local address, " - "dest: %pI6c\n", - &ipv6_hdr(skb)->saddr, daddr); - goto err_put; - } - } else { - ort = (struct rt6_info *) skb_dst(skb); - if (!(rt_mode & IP_VS_RT_MODE_RDR) && - !__ip_vs_is_local_route6(ort)) { - IP_VS_DBG_RL("Redirect from non-local address %pI6c " - "to local requires NAT method, " - "dest: %pI6c\n", - &ipv6_hdr(skb)->daddr, daddr); - goto err_put; - } + + if (unlikely(local)) { /* skb to local stack, preserve old route */ if (!noref) dst_release(&rt->dst); @@ -428,28 +473,17 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) mtu = dst_mtu(&rt->dst); else { - struct sock *sk = skb->sk; - mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); goto err_put; } - ort = (struct rt6_info *) skb_dst(skb); - if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + maybe_update_pmtu(skb_af, skb, mtu); } - if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { - if (!skb->dev) - skb->dev = net->loopback_dev; - /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); + if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; - } skb_dst_drop(skb); if (noref) { @@ -555,8 +589,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, - NULL) < 0) + if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, + IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; ip_send_check(iph); @@ -585,7 +619,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, + if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; @@ -632,10 +666,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } was_input = rt_is_input_route(skb_rtable(skb)); - local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR, NULL); + IP_VS_RT_MODE_RDR, NULL, ipvsh); if (local < 0) goto tx_error; rt = skb_rtable(skb); @@ -720,8 +754,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - ipvsh, 0, + local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR); @@ -790,6 +824,81 @@ tx_error: } #endif +/* When forwarding a packet, we must ensure that we've got enough headroom + * for the encapsulation packet in the skb. This also gives us an + * opportunity to figure out what the payload_len, dsfield, ttl, and df + * values should be, so that we won't need to look at the old ip header + * again + */ +static struct sk_buff * +ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, + unsigned int max_headroom, __u8 *next_protocol, + __u32 *payload_len, __u8 *dsfield, __u8 *ttl, + __be16 *df) +{ + struct sk_buff *new_skb = NULL; + struct iphdr *old_iph = NULL; +#ifdef CONFIG_IP_VS_IPV6 + struct ipv6hdr *old_ipv6h = NULL; +#endif + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) + goto error; + consume_skb(skb); + skb = new_skb; + } + +#ifdef CONFIG_IP_VS_IPV6 + if (skb_af == AF_INET6) { + old_ipv6h = ipv6_hdr(skb); + *next_protocol = IPPROTO_IPV6; + if (payload_len) + *payload_len = + ntohs(old_ipv6h->payload_len) + + sizeof(*old_ipv6h); + *dsfield = ipv6_get_dsfield(old_ipv6h); + *ttl = old_ipv6h->hop_limit; + if (df) + *df = 0; + } else +#endif + { + old_iph = ip_hdr(skb); + /* Copy DF, reset fragment offset and MF */ + if (df) + *df = (old_iph->frag_off & htons(IP_DF)); + *next_protocol = IPPROTO_IPIP; + + /* fix old IP header checksum */ + ip_send_check(old_iph); + *dsfield = ipv4_get_dsfield(old_iph); + *ttl = old_iph->ttl; + if (payload_len) + *payload_len = ntohs(old_iph->tot_len); + } + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(-ENOMEM); +} + +static inline int __tun_gso_type_mask(int encaps_af, int orig_af) +{ + if (encaps_af == AF_INET) { + if (orig_af == AF_INET) + return SKB_GSO_IPIP; + + return SKB_GSO_SIT; + } + + /* GSO: we need to provide proper SKB_GSO_ value for IPv6: + * SKB_GSO_SIT/IPV6 + */ + return 0; +} /* * IP Tunneling transmitter @@ -818,9 +927,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ - struct iphdr *old_iph = ip_hdr(skb); - u8 tos = old_iph->tos; - __be16 df; + __u8 next_protocol = 0; + __u8 dsfield = 0; + __u8 ttl = 0; + __be16 df = 0; + __be16 *dfp = NULL; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; @@ -828,11 +939,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_CONNECT | - IP_VS_RT_MODE_TUNNEL, &saddr); + IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); if (local < 0) goto tx_error; if (local) { @@ -843,30 +954,26 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rt = skb_rtable(skb); tdev = rt->dst.dev; - /* Copy DF, reset fragment offset and MF */ - df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { - struct sk_buff *new_skb = - skb_realloc_headroom(skb, max_headroom); + /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ + dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; + skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, + &next_protocol, NULL, &dsfield, + &ttl, dfp); + if (IS_ERR(skb)) + goto tx_error; - if (!new_skb) - goto tx_error; - consume_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - } + skb = iptunnel_handle_offloads( + skb, false, __tun_gso_type_mask(AF_INET, cp->af)); + if (IS_ERR(skb)) + goto tx_error; skb->transport_header = skb->network_header; - /* fix old IP header checksum */ - ip_send_check(old_iph); - skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -878,11 +985,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; - iph->protocol = IPPROTO_IPIP; - iph->tos = tos; + iph->protocol = next_protocol; + iph->tos = dsfield; iph->daddr = cp->daddr.ip; iph->saddr = saddr; - iph->ttl = old_iph->ttl; + iph->ttl = ttl; ip_select_ident(skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ @@ -900,7 +1007,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; @@ -914,7 +1022,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ - struct ipv6hdr *old_iph = ipv6_hdr(skb); + __u8 next_protocol = 0; + __u32 payload_len = 0; + __u8 dsfield = 0; + __u8 ttl = 0; struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; @@ -922,7 +1033,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -942,16 +1053,16 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); - if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { - struct sk_buff *new_skb = - skb_realloc_headroom(skb, max_headroom); + skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, + &next_protocol, &payload_len, + &dsfield, &ttl, NULL); + if (IS_ERR(skb)) + goto tx_error; - if (!new_skb) - goto tx_error; - consume_skb(skb); - skb = new_skb; - old_iph = ipv6_hdr(skb); - } + skb = iptunnel_handle_offloads( + skb, false, __tun_gso_type_mask(AF_INET6, cp->af)); + if (IS_ERR(skb)) + goto tx_error; skb->transport_header = skb->network_header; @@ -964,14 +1075,13 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ iph = ipv6_hdr(skb); iph->version = 6; - iph->nexthdr = IPPROTO_IPV6; - iph->payload_len = old_iph->payload_len; - be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); + iph->nexthdr = next_protocol; + iph->payload_len = htons(payload_len); memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); - ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph)); + ipv6_change_dsfield(iph, 0, dsfield); iph->daddr = cp->daddr.in6; iph->saddr = saddr; - iph->hop_limit = old_iph->hop_limit; + iph->hop_limit = ttl; /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; @@ -988,7 +1098,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; @@ -1009,10 +1120,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_KNOWN_NH, NULL); + IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); if (local < 0) goto tx_error; if (local) { @@ -1048,8 +1159,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - ipvsh, 0, + local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL); if (local < 0) @@ -1116,7 +1227,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); + local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, + NULL, iph); if (local < 0) goto tx_error; rt = skb_rtable(skb); @@ -1207,8 +1319,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - ipvsh, 0, rt_mode); + local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index de88c4ab514..5016a692908 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -142,7 +142,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) static u32 __hash_bucket(u32 hash, unsigned int size) { - return ((u64)hash * size) >> 32; + return reciprocal_scale(hash, size); } static u32 hash_bucket(u32 hash, const struct net *net) @@ -358,7 +358,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) - tstamp->stop = ktime_to_ns(ktime_get_real()); + tstamp->stop = ktime_get_real_ns(); if (nf_ct_is_dying(ct)) goto delete; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index f87e8f68ad4..91a1837acd0 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -83,7 +83,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); - return ((u64)hash * nf_ct_expect_hsize) >> 32; + + return reciprocal_scale(hash, nf_ct_expect_hsize); } struct nf_conntrack_expect * diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b8a0924064e..b666959f17c 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -304,12 +304,12 @@ static int find_pattern(const char *data, size_t dlen, if (dlen <= plen) { /* Short packet: try for partial? */ - if (strnicmp(data, pattern, dlen) == 0) + if (strncasecmp(data, pattern, dlen) == 0) return -1; else return 0; } - if (strnicmp(data, pattern, plen) != 0) { + if (strncasecmp(data, pattern, plen) != 0) { #if 0 size_t i; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 355a5c4ef76..1bd9ed9e62f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1737,7 +1737,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } tstamp = nf_conn_tstamp_find(ct); if (tstamp) - tstamp->start = ktime_to_ns(ktime_get_real()); + tstamp->start = ktime_get_real_ns(); err = nf_conntrack_hash_check_insert(ct); if (err < 0) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d25f2937764..957c1db6665 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -14,6 +14,30 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static bool nf_generic_should_process(u8 proto) +{ + switch (proto) { +#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE + case IPPROTO_SCTP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE + case IPPROTO_DCCP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE + case IPPROTO_GRE: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE + case IPPROTO_UDPLITE: + return false; +#endif + default: + return true; + } +} + static inline struct nf_generic_net *generic_pernet(struct net *net) { return &net->ct.nf_ct_proto.generic; @@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct, static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { - return true; + return nf_generic_should_process(nf_ct_protonum(ct)); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44d1ea32570..d87b6423ffb 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open @@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sFW -> sIV * sCW -> sIV * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. + * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4c3ba1c8d68..885b4aba369 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -247,7 +247,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, for (; dptr < limit - strlen("sip:"); dptr++) { if (*dptr == '\r' || *dptr == '\n') return -1; - if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) { + if (strncasecmp(dptr, "sip:", strlen("sip:")) == 0) { dptr += strlen("sip:"); break; } @@ -350,7 +350,7 @@ static const char *ct_sip_header_search(const char *dptr, const char *limit, continue; } - if (strnicmp(dptr, needle, len) == 0) + if (strncasecmp(dptr, needle, len) == 0) return dptr; } return NULL; @@ -383,10 +383,10 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, /* Find header. Compact headers must be followed by a * non-alphabetic character to avoid mismatches. */ if (limit - dptr >= hdr->len && - strnicmp(dptr, hdr->name, hdr->len) == 0) + strncasecmp(dptr, hdr->name, hdr->len) == 0) dptr += hdr->len; else if (hdr->cname && limit - dptr >= hdr->clen + 1 && - strnicmp(dptr, hdr->cname, hdr->clen) == 0 && + strncasecmp(dptr, hdr->cname, hdr->clen) == 0 && !isalpha(*(dptr + hdr->clen))) dptr += hdr->clen; else @@ -620,9 +620,9 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=", &matchoff, &matchlen)) { - if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP"))) + if (!strncasecmp(dptr + matchoff, "TCP", strlen("TCP"))) *proto = IPPROTO_TCP; - else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP"))) + else if (!strncasecmp(dptr + matchoff, "UDP", strlen("UDP"))) *proto = IPPROTO_UDP; else return 0; @@ -743,10 +743,10 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, if (term != SDP_HDR_UNSPEC && limit - dptr >= thdr->len && - strnicmp(dptr, thdr->name, thdr->len) == 0) + strncasecmp(dptr, thdr->name, thdr->len) == 0) break; else if (limit - dptr >= hdr->len && - strnicmp(dptr, hdr->name, hdr->len) == 0) + strncasecmp(dptr, hdr->name, hdr->len) == 0) dptr += hdr->len; else continue; @@ -1394,7 +1394,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, if (handler->response == NULL) continue; if (*datalen < matchend + handler->len || - strnicmp(*dptr + matchend, handler->method, handler->len)) + strncasecmp(*dptr + matchend, handler->method, handler->len)) continue; return handler->response(skb, protoff, dataoff, dptr, datalen, cseq, code); @@ -1435,7 +1435,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, if (handler->request == NULL) continue; if (*datalen < handler->len || - strnicmp(*dptr, handler->method, handler->len)) + strncasecmp(*dptr, handler->method, handler->len)) continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, @@ -1462,7 +1462,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_sip_hooks *hooks; int ret; - if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) + if (strncasecmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) ret = process_sip_request(skb, protoff, dataoff, dptr, datalen); else ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f641751dba9..cf65a1e040d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -101,7 +101,7 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) { struct ct_iter_state *st = seq->private; - st->time_now = ktime_to_ns(ktime_get_real()); + st->time_now = ktime_get_real_ns(); rcu_read_lock(); return ct_get_idx(seq, *pos); } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index daad6022c68..d7197649dba 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -30,7 +30,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) log = rcu_dereference_protected(loggers[pf][i], lockdep_is_held(&nf_log_mutex)); - if (!strnicmp(str_logger, log->name, strlen(log->name))) + if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index eeb8ef4ff1a..a2233e77cf3 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -158,7 +158,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge) { const struct net_device *physindev; const struct net_device *physoutdev; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 552f97cd9fd..4e0b47831d4 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -126,7 +126,8 @@ hash_by_src(const struct net *net, u16 zone, /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); - return ((u64)hash * net->ct.nat_htable_size) >> 32; + + return reciprocal_scale(hash, net->ct.nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -274,7 +275,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, } var_ipp->all[i] = (__force __u32) - htonl(minip + (((u64)j * dist) >> 32)); + htonl(minip + reciprocal_scale(j, dist)); if (var_ipp->all[i] != range->max_addr.all[i]) full_range = true; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index b4d691db955..791fac4fd74 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -155,7 +155,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, int request, in_header; /* Basic rules: requests and responses. */ - if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { + if (strncasecmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5d24b1fdb59..4c8b68e5fa1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -52,7 +52,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(entry->indev); if (entry->outdev) dev_put(entry->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; @@ -77,7 +77,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(entry->indev); if (entry->outdev) dev_hold(entry->outdev); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; struct net_device *physdev; diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index f042ae52155..c68c1e58b36 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -26,9 +26,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg) struct nf_sockopt_ops *ops; int ret = 0; - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - + mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, @@ -65,9 +63,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, { struct nf_sockopt_ops *ops; - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b8035c2d666..11ab4b078f3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -127,6 +127,204 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static void nf_tables_unregister_hooks(const struct nft_table *table, + const struct nft_chain *chain, + unsigned int hook_nops) +{ + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) + nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops); +} + +/* Internal table flags */ +#define NFT_TABLE_INACTIVE (1 << 15) + +static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWTABLE) + ctx->table->flags |= NFT_TABLE_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + +static int nft_deltable(struct nft_ctx *ctx) +{ + int err; + + err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE); + if (err < 0) + return err; + + list_del_rcu(&ctx->table->list); + return err; +} + +static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWCHAIN) + ctx->chain->flags |= NFT_CHAIN_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + +static int nft_delchain(struct nft_ctx *ctx) +{ + int err; + + err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN); + if (err < 0) + return err; + + ctx->table->use--; + list_del_rcu(&ctx->chain->list); + + return err; +} + +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + +static int +nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + nft_rule_deactivate_next(ctx->net, rule); + ctx->chain->use--; + return 0; + } + return -ENOENT; +} + +static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, + struct nft_rule *rule) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); + if (trans == NULL) + return NULL; + + nft_trans_rule(trans) = rule; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return trans; +} + +static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) +{ + struct nft_trans *trans; + int err; + + trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule); + if (trans == NULL) + return -ENOMEM; + + err = nf_tables_delrule_deactivate(ctx, rule); + if (err < 0) { + nft_trans_destroy(trans); + return err; + } + + return 0; +} + +static int nft_delrule_by_chain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &ctx->chain->rules, list) { + err = nft_delrule(ctx, rule); + if (err < 0) + return err; + } + return 0; +} + +/* Internal set flag */ +#define NFT_SET_INACTIVE (1 << 15) + +static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + set->flags |= NFT_SET_INACTIVE; + } + nft_trans_set(trans) = set; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) +{ + int err; + + err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set); + if (err < 0) + return err; + + list_del_rcu(&set->list); + ctx->table->use--; + + return err; +} + /* * Tables */ @@ -207,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; -static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, - const struct nft_table *table) +static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -222,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || @@ -250,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table); + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -290,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_table_info(skb, + if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWTABLE, @@ -309,9 +507,6 @@ done: return skb->len; } -/* Internal table flags */ -#define NFT_TABLE_INACTIVE (1 << 15) - static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -345,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, family, table); if (err < 0) @@ -443,21 +638,6 @@ err: return ret; } -static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) -{ - struct nft_trans *trans; - - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); - if (trans == NULL) - return -ENOMEM; - - if (msg_type == NFT_MSG_NEWTABLE) - ctx->table->flags |= NFT_TABLE_INACTIVE; - - list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return 0; -} - static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -527,6 +707,67 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return 0; } +static int nft_flush_table(struct nft_ctx *ctx) +{ + int err; + struct nft_chain *chain, *nc; + struct nft_set *set, *ns; + + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + ctx->chain = chain; + + err = nft_delrule_by_chain(ctx); + if (err < 0) + goto out; + + err = nft_delchain(ctx); + if (err < 0) + goto out; + } + + list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { + if (set->flags & NFT_SET_ANONYMOUS && + !list_empty(&set->bindings)) + continue; + + err = nft_delset(ctx, set); + if (err < 0) + goto out; + } + + err = nft_deltable(ctx); +out: + return err; +} + +static int nft_flush(struct nft_ctx *ctx, int family) +{ + struct nft_af_info *afi; + struct nft_table *table, *nt; + const struct nlattr * const *nla = ctx->nla; + int err = 0; + + list_for_each_entry(afi, &ctx->net->nft.af_info, list) { + if (family != AF_UNSPEC && afi->family != family) + continue; + + ctx->afi = afi; + list_for_each_entry_safe(table, nt, &afi->tables, list) { + if (nla[NFTA_TABLE_NAME] && + nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) + continue; + + ctx->table = table; + + err = nft_flush_table(ctx); + if (err < 0) + goto out; + } + } +out: + return err; +} + static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -535,9 +776,13 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, struct nft_af_info *afi; struct nft_table *table; struct net *net = sock_net(skb->sk); - int family = nfmsg->nfgen_family, err; + int family = nfmsg->nfgen_family; struct nft_ctx ctx; + nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla); + if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) + return nft_flush(&ctx, family); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -547,16 +792,11 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(table); if (table->flags & NFT_TABLE_INACTIVE) return -ENOENT; - if (table->use > 0) - return -EBUSY; - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); - err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE); - if (err < 0) - return err; + ctx.afi = afi; + ctx.table = table; - list_del_rcu(&table->list); - return 0; + return nft_flush_table(&ctx); } static void nf_tables_table_destroy(struct nft_ctx *ctx) @@ -674,9 +914,9 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, - const struct nft_table *table, +static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table, const struct nft_chain *chain) { struct nlmsghdr *nlh; @@ -690,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; @@ -748,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) if (skb == NULL) goto err; - err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table, + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -791,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + if (nf_tables_fill_chain_info(skb, net, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, NLM_F_MULTI, @@ -850,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, family, table, chain); if (err < 0) @@ -899,6 +1140,9 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) static void nft_chain_stats_replace(struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { + if (newstats == NULL) + return; + if (chain->stats) { struct nft_stats __percpu *oldstats = nft_dereference(chain->stats); @@ -910,21 +1154,6 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); } -static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) -{ - struct nft_trans *trans; - - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); - if (trans == NULL) - return -ENOMEM; - - if (msg_type == NFT_MSG_NEWCHAIN) - ctx->chain->flags |= NFT_CHAIN_INACTIVE; - - list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return 0; -} - static void nf_tables_chain_destroy(struct nft_chain *chain) { BUG_ON(chain->use > 0); @@ -1099,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (IS_ERR(stats)) { + if (stats == NULL) { module_put(type->owner); kfree(basechain); - return PTR_ERR(stats); + return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } @@ -1154,11 +1383,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) { - nf_unregister_hooks(nft_base_chain(chain)->ops, - afi->nops); - } + nf_tables_unregister_hooks(table, chain, afi->nops); err1: nf_tables_chain_destroy(chain); return err; @@ -1175,7 +1400,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_ctx ctx; - int err; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1196,13 +1420,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, return -EBUSY; nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN); - if (err < 0) - return err; - table->use--; - list_del_rcu(&chain->list); - return 0; + return nft_delchain(&ctx); } /* @@ -1429,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_USERDATA_MAXLEN }, }; -static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, u32 flags, int family, +static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, + u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, const struct nft_rule *rule) @@ -1450,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; @@ -1506,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx, if (skb == NULL) goto err; - err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, ctx->table, + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table, ctx->chain, rule); if (err < 0) { kfree_skb(skb); @@ -1524,41 +1744,6 @@ err: return err; } -static inline bool -nft_rule_is_active(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & (1 << net->nft.gencursor)) == 0; -} - -static inline int gencursor_next(struct net *net) -{ - return net->nft.gencursor+1 == 1 ? 1 : 0; -} - -static inline int -nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & (1 << gencursor_next(net))) == 0; -} - -static inline void -nft_rule_activate_next(struct net *net, struct nft_rule *rule) -{ - /* Now inactive, will be active in the future */ - rule->genmask = (1 << net->nft.gencursor); -} - -static inline void -nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) -{ - rule->genmask = (1 << gencursor_next(net)); -} - -static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) -{ - rule->genmask = 0; -} - static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { @@ -1588,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, @@ -1654,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, if (!skb2) return -ENOMEM; - err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule); if (err < 0) @@ -1684,21 +1869,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, - struct nft_rule *rule) -{ - struct nft_trans *trans; - - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); - if (trans == NULL) - return NULL; - - nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); - - return trans; -} - #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -1820,7 +1990,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err = -ENOMEM; goto err2; } - nft_rule_disactivate_next(net, old_rule); + nft_rule_deactivate_next(net, old_rule); chain->use--; list_add_tail_rcu(&rule->list, &old_rule->list); } else { @@ -1864,33 +2034,6 @@ err1: return err; } -static int -nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) -{ - /* You cannot delete the same rule twice */ - if (nft_rule_is_active_next(ctx->net, rule)) { - if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL) - return -ENOMEM; - nft_rule_disactivate_next(ctx->net, rule); - ctx->chain->use--; - return 0; - } - return -ENOENT; -} - -static int nf_table_delrule_by_chain(struct nft_ctx *ctx) -{ - struct nft_rule *rule; - int err; - - list_for_each_entry(rule, &ctx->chain->rules, list) { - err = nf_tables_delrule_one(ctx, rule); - if (err < 0) - return err; - } - return 0; -} - static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1929,14 +2072,14 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(rule)) return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule); + err = nft_delrule(&ctx, rule); } else { - err = nf_table_delrule_by_chain(&ctx); + err = nft_delrule_by_chain(&ctx); } } else { list_for_each_entry(chain, &table->chains, list) { ctx.chain = chain; - err = nf_table_delrule_by_chain(&ctx); + err = nft_delrule_by_chain(&ctx); if (err < 0) break; } @@ -2180,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx->afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; @@ -2201,6 +2344,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (set->policy != NFT_SET_POL_PERFORMANCE) { + if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) + goto nla_put_failure; + } + desc = nla_nest_start(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; @@ -2319,8 +2467,6 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) return 0; } -#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */ - static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2395,26 +2541,6 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, return 0; } -static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, - struct nft_set *set) -{ - struct nft_trans *trans; - - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); - if (trans == NULL) - return -ENOMEM; - - if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { - nft_trans_set_id(trans) = - ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); - set->flags |= NFT_SET_INACTIVE; - } - nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); - - return 0; -} - static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2548,6 +2674,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; + set->policy = policy; err = ops->init(set, &desc, nla); if (err < 0) @@ -2608,13 +2735,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (!list_empty(&set->bindings)) return -EBUSY; - err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set); - if (err < 0) - return err; - - list_del_rcu(&set->list); - ctx.table->use--; - return 0; + return nft_delset(&ctx, set); } static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, @@ -2812,7 +2933,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx.afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) goto nla_put_failure; @@ -2893,7 +3014,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = ctx->afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; @@ -3134,16 +3255,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, goto err2; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); - if (trans == NULL) + if (trans == NULL) { + err = -ENOMEM; goto err2; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); - - nft_data_uninit(&elem.key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(&elem.data, set->dtype); - return 0; err2: nft_data_uninit(&elem.key, desc.type); @@ -3183,6 +3301,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, return err; } +static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -EMSGSIZE; +} + +static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event) +{ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct sk_buff *skb2; + int err; + + if (nlmsg_report(nlh) && + !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + goto err; + + err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq); + if (err < 0) { + kfree_skb(skb2); + goto err; + } + + err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, + NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL); +err: + if (err < 0) { + nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES, + err); + } + return err; +} + +static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int err; + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; +} + static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call_batch = nf_tables_newtable, @@ -3259,6 +3458,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, + [NFT_MSG_GETGEN] = { + .call = nf_tables_getgen, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -3310,7 +3512,7 @@ static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; - struct nft_set *set; + struct nft_trans_elem *te; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3352,11 +3554,9 @@ static int nf_tables_commit(struct sk_buff *skb) break; case NFT_MSG_DELCHAIN: nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && - trans->ctx.chain->flags & NFT_BASE_CHAIN) { - nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, - trans->ctx.afi->nops); - } + nf_tables_unregister_hooks(trans->ctx.table, + trans->ctx.chain, + trans->ctx.afi->nops); break; case NFT_MSG_NEWRULE: nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); @@ -3396,13 +3596,17 @@ static int nf_tables_commit(struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_DELSETELEM, 0); - set = nft_trans_elem_set(trans); - set->ops->get(set, &nft_trans_elem(trans)); - set->ops->remove(set, &nft_trans_elem(trans)); + te->set->ops->get(te->set, &te->elem); + te->set->ops->remove(te->set, &te->elem); + nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); + if (te->elem.flags & NFT_SET_MAP) { + nft_data_uninit(&te->elem.data, + te->set->dtype); + } nft_trans_destroy(trans); break; } @@ -3414,6 +3618,8 @@ static int nf_tables_commit(struct sk_buff *skb) call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); } + nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); + return 0; } @@ -3475,11 +3681,9 @@ static int nf_tables_abort(struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && - trans->ctx.chain->flags & NFT_BASE_CHAIN) { - nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, - trans->ctx.afi->nops); - } + nf_tables_unregister_hooks(trans->ctx.table, + trans->ctx.chain, + trans->ctx.afi->nops); } break; case NFT_MSG_DELCHAIN: @@ -3540,6 +3744,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .abort = nf_tables_abort, }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type) +{ + const struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EOPNOTSUPP; + } + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all @@ -3959,6 +4177,7 @@ static void __exit nf_tables_module_exit(void) { unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); + rcu_barrier(); nf_tables_core_module_exit(); kfree(info); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c138b8fbe28..6c5a915cfa7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -222,6 +222,51 @@ replay: } } +struct nfnl_err { + struct list_head head; + struct nlmsghdr *nlh; + int err; +}; + +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +{ + struct nfnl_err *nfnl_err; + + nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL); + if (nfnl_err == NULL) + return -ENOMEM; + + nfnl_err->nlh = nlh; + nfnl_err->err = err; + list_add_tail(&nfnl_err->head, list); + + return 0; +} + +static void nfnl_err_del(struct nfnl_err *nfnl_err) +{ + list_del(&nfnl_err->head); + kfree(nfnl_err); +} + +static void nfnl_err_reset(struct list_head *err_list) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) + nfnl_err_del(nfnl_err); +} + +static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) { + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + nfnl_err_del(nfnl_err); + } +} + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; bool success = true, done = false; + static LIST_HEAD(err_list); int err; if (subsys_id >= NFNL_SUBSYS_COUNT) @@ -287,6 +333,7 @@ replay: type = nlh->nlmsg_type; if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ + nfnl_err_reset(&err_list); success = false; goto done; } else if (type == NFNL_MSG_BATCH_END) { @@ -333,7 +380,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - ss->abort(skb); + nfnl_err_reset(&err_list); + ss->abort(oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); goto replay; @@ -341,11 +389,24 @@ replay: } ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* Errors are delivered once the full batch has been + * processed, this avoids that the same error is + * reported several times when replaying the batch. + */ + if (nfnl_err_add(&err_list, nlh, err) < 0) { + /* We failed to enqueue an error, reset the + * list of errors and send OOM to userspace + * pointing to the batch header. + */ + nfnl_err_reset(&err_list); + netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); + success = false; + goto done; + } /* We don't stop processing the batch on errors, thus, * userspace gets all the errors that the batch * triggers. */ - netlink_ack(skb, nlh, err); if (err) success = false; } @@ -357,10 +418,11 @@ ack: } done: if (success && done) - ss->commit(skb); + ss->commit(oskb); else - ss->abort(skb); + ss->abort(oskb); + nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 3ea0eacbd97..c18af2f63ee 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -40,6 +40,11 @@ struct nf_acct { char data[0]; }; +struct nfacct_filter { + u32 value; + u32 mask; +}; + #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ @@ -181,6 +186,7 @@ static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_acct *cur, *last; + const struct nfacct_filter *filter = cb->data; if (cb->args[2]) return 0; @@ -197,6 +203,10 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) last = NULL; } + + if (filter && (cur->flags & filter->mask) != filter->value) + continue; + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), @@ -211,6 +221,38 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int nfnl_acct_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + +static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = { + [NFACCT_FILTER_MASK] = { .type = NLA_U32 }, + [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, +}; + +static struct nfacct_filter * +nfacct_filter_alloc(const struct nlattr * const attr) +{ + struct nfacct_filter *filter; + struct nlattr *tb[NFACCT_FILTER_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy); + if (err < 0) + return ERR_PTR(err); + + filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); + + filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); + filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); + + return filter; +} + static int nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) @@ -222,7 +264,18 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_acct_dump, + .done = nfnl_acct_done, }; + + if (tb[NFACCT_FILTER]) { + struct nfacct_filter *filter; + + filter = nfacct_filter_alloc(tb[NFACCT_FILTER]); + if (IS_ERR(filter)) + return PTR_ERR(filter); + + c.data = filter; + } return netlink_dump_start(nfnl, skb, nlh, &c); } @@ -314,6 +367,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_PKTS] = { .type = NLA_U64 }, [NFACCT_FLAGS] = { .type = NLA_U32 }, [NFACCT_QUOTA] = { .type = NLA_U64 }, + [NFACCT_FILTER] = {.type = NLA_NESTED }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index a11c5ff2f72..5f1be5ba355 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -36,14 +36,15 @@ #include <linux/atomic.h> -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" #endif #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -343,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void @@ -429,7 +431,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; @@ -460,7 +462,7 @@ __build_packet_message(struct nfnl_log_net *log, } if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; @@ -640,7 +642,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif @@ -649,7 +651,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -678,8 +681,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; @@ -692,8 +694,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 108120f216b..7c60ccd61a3 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -36,7 +36,7 @@ #include <linux/atomic.h> -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" #endif @@ -302,7 +302,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ #endif @@ -380,7 +380,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, indev = entry->indev; if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; #else @@ -410,7 +410,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER +#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; #else @@ -569,7 +569,7 @@ nf_queue_entry_dup(struct nf_queue_entry *e) return NULL; } -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* When called from bridge netfilter, skb->data must point to MAC header * before calling skb_gso_segment(). Else, original MAC header is lost * and segmented skbs will be sent to wrong destination. @@ -665,7 +665,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) * returned by nf_queue. For instance, callers rely on -ECANCELED to * mean 'ignore this hook'. */ - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out_err; queued = 0; err = 0; @@ -763,7 +763,7 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (entry->outdev) if (entry->outdev->ifindex == ifindex) return 1; -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { if (entry->skb->nf_bridge->physindev && entry->skb->nf_bridge->physindev->ifindex == ifindex) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 1840989092e..9d6d6f60a80 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,9 +19,52 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#include <asm/uaccess.h> /* for set_fs */ #include <net/netfilter/nf_tables.h> +static const struct { + const char *name; + u8 type; +} table_to_chaintype[] = { + { "filter", NFT_CHAIN_T_DEFAULT }, + { "raw", NFT_CHAIN_T_DEFAULT }, + { "security", NFT_CHAIN_T_DEFAULT }, + { "mangle", NFT_CHAIN_T_ROUTE }, + { "nat", NFT_CHAIN_T_NAT }, + { }, +}; + +static int nft_compat_table_to_chaintype(const char *table) +{ + int i; + + for (i = 0; table_to_chaintype[i].name != NULL; i++) { + if (strcmp(table_to_chaintype[i].name, table) == 0) + return table_to_chaintype[i].type; + } + + return -1; +} + +static int nft_compat_chain_validate_dependency(const char *tablename, + const struct nft_chain *chain) +{ + enum nft_chain_type type; + const struct nft_base_chain *basechain; + + if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + return 0; + + type = nft_compat_table_to_chaintype(tablename); + if (type < 0) + return -EINVAL; + + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EINVAL; + + return 0; +} + union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; @@ -95,32 +138,20 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } static void target_compat_from_user(struct xt_target *t, void *in, void *out) { -#ifdef CONFIG_COMPAT - if (t->compat_from_user) { - int pad; - - t->compat_from_user(out, in); - pad = XT_ALIGN(t->targetsize) - t->targetsize; - if (pad > 0) - memset(out + t->targetsize, 0, pad); - } else -#endif - memcpy(out, in, XT_ALIGN(t->targetsize)); -} + int pad; -static inline int nft_compat_target_offset(struct xt_target *target) -{ -#ifdef CONFIG_COMPAT - return xt_compat_target_offset(target); -#else - return 0; -#endif + memcpy(out, in, t->targetsize); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); } static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { @@ -165,6 +196,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); + if (ret < 0) + goto err; + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -208,34 +243,6 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(target->me); } -static int -target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) -{ - int ret; - -#ifdef CONFIG_COMPAT - if (t->compat_to_user) { - mm_segment_t old_fs; - void *out; - - out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); - if (out == NULL) - return -ENOMEM; - - /* We want to reuse existing compat_to_user */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - t->compat_to_user(out, in); - set_fs(old_fs); - ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); - kfree(out); - } else -#endif - ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); - - return ret; -} - static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct xt_target *target = expr->ops->data; @@ -243,7 +250,7 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || - target_dump_info(skb, target, info)) + nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info)) goto nla_put_failure; return 0; @@ -258,6 +265,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, { struct xt_target *target = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -265,11 +273,13 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & target->hooks) - return 0; + if (!(hook_mask & target->hooks)) + return -EINVAL; - /* This target is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(target->table, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -335,23 +345,20 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } static void match_compat_from_user(struct xt_match *m, void *in, void *out) { -#ifdef CONFIG_COMPAT - if (m->compat_from_user) { - int pad; - - m->compat_from_user(out, in); - pad = XT_ALIGN(m->matchsize) - m->matchsize; - if (pad > 0) - memset(out + m->matchsize, 0, pad); - } else -#endif - memcpy(out, in, XT_ALIGN(m->matchsize)); + int pad; + + memcpy(out, in, m->matchsize); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); } static int @@ -367,6 +374,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + if (ret < 0) + goto err; + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -404,43 +415,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(match->me); } -static int -match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) -{ - int ret; - -#ifdef CONFIG_COMPAT - if (m->compat_to_user) { - mm_segment_t old_fs; - void *out; - - out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); - if (out == NULL) - return -ENOMEM; - - /* We want to reuse existing compat_to_user */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - m->compat_to_user(out, in); - set_fs(old_fs); - ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); - kfree(out); - } else -#endif - ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); - - return ret; -} - -static inline int nft_compat_match_offset(struct xt_match *match) -{ -#ifdef CONFIG_COMPAT - return xt_compat_match_offset(match); -#else - return 0; -#endif -} - static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) { void *info = nft_expr_priv(expr); @@ -448,7 +422,7 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || - match_dump_info(skb, match, info)) + nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info)) goto nla_put_failure; return 0; @@ -463,6 +437,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, { struct xt_match *match = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -470,11 +445,13 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & match->hooks) - return 0; + if (!(hook_mask & match->hooks)) + return -EINVAL; - /* This match is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(match->name, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -643,8 +620,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-ENOMEM); nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + - nft_compat_match_offset(match)); + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -696,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, family = ctx->afi->family; /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_match_list, head) { + list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && @@ -714,8 +690,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-ENOMEM); nft_target->ops.type = &nft_target_type; - nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + - nft_compat_target_offset(target)); + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.eval = nft_target_eval; nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 28fb8f38e6b..8892b7b6184 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -180,15 +180,17 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; + const struct bucket_table *tbl = priv->tbl; struct nft_hash_elem *he, *next; unsigned int i; - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node) + for (i = 0; i < tbl->size; i++) { + for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); + he != NULL; he = next) { + next = rht_entry(he->node.next, struct nft_hash_elem, node); nft_hash_elem_destroy(set, he); - + } + } rhashtable_destroy(priv); } diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c new file mode 100644 index 00000000000..d1ffd5eb3a9 --- /dev/null +++ b/net/netfilter/nft_masq.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nft_masq.h> + +const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { + [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL_GPL(nft_masq_policy); + +int nft_masq_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_masq *priv = nft_expr_priv(expr); + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_MASQ_FLAGS] == NULL) + return 0; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_masq_init); + +int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_masq *priv = nft_expr_priv(expr); + + if (priv->flags == 0) + return 0; + + if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} +EXPORT_SYMBOL_GPL(nft_masq_dump); + +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 852b178c6ae..1e7c076ca63 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -14,6 +14,10 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/smp.h> #include <net/dst.h> #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ @@ -124,6 +128,43 @@ void nft_meta_get_eval(const struct nft_expr *expr, dest->data[0] = skb->secmark; break; #endif + case NFT_META_PKTTYPE: + if (skb->pkt_type != PACKET_LOOPBACK) { + dest->data[0] = skb->pkt_type; + break; + } + + switch (pkt->ops->pf) { + case NFPROTO_IPV4: + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + dest->data[0] = PACKET_MULTICAST; + else + dest->data[0] = PACKET_BROADCAST; + break; + case NFPROTO_IPV6: + if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) + dest->data[0] = PACKET_MULTICAST; + else + dest->data[0] = PACKET_BROADCAST; + break; + default: + WARN_ON(1); + goto err; + } + break; + case NFT_META_CPU: + dest->data[0] = smp_processor_id(); + break; + case NFT_META_IIFGROUP: + if (in == NULL) + goto err; + dest->data[0] = in->group; + break; + case NFT_META_OIFGROUP: + if (out == NULL) + goto err; + dest->data[0] = out->group; + break; default: WARN_ON(1); goto err; @@ -195,6 +236,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx, #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif + case NFT_META_PKTTYPE: + case NFT_META_CPU: + case NFT_META_IIFGROUP: + case NFT_META_OIFGROUP: break; default: return -EOPNOTSUPP; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 79ff58cd36d..afe2b0b45ec 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -33,6 +33,7 @@ struct nft_nat { enum nft_registers sreg_proto_max:8; enum nf_nat_manip_type type:8; u8 family; + u16 flags; }; static void nft_nat_eval(const struct nft_expr *expr, @@ -71,6 +72,8 @@ static void nft_nat_eval(const struct nft_expr *expr, range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } + range.flags |= priv->flags; + data[NFT_REG_VERDICT].verdict = nf_nat_setup_info(ct, &range, priv->type); } @@ -82,6 +85,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 }, [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 }, [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_FLAGS] = { .type = NLA_U32 }, }; static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, @@ -91,7 +95,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, u32 family; int err; - if (tb[NFTA_NAT_TYPE] == NULL) + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_NAT_TYPE] == NULL || + (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && + tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) return -EINVAL; switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { @@ -116,38 +126,50 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MIN])); + priv->sreg_addr_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else { + priv->sreg_addr_max = priv->sreg_addr_min; + } + } if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MIN])); + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); if (err < 0) return err; + + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } } - if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; + if (tb[NFTA_NAT_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } return 0; } @@ -169,26 +191,41 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; + + if (priv->sreg_addr_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, + htonl(priv->sreg_addr_min)) || + nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, + htonl(priv->sreg_addr_max))) + goto nla_put_failure; + } + if (priv->sreg_proto_min) { if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_min)) || + nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) goto nla_put_failure; } + + if (priv->flags != 0) { + if (nla_put_be32(skb, NFTA_NAT_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + } + return 0; nla_put_failure: return -1; } +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, @@ -196,6 +233,7 @@ static const struct nft_expr_ops nft_nat_ops = { .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, + .validate = nft_nat_validate, }; static struct nft_expr_type nft_nat_type __read_mostly = { diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e1836ff8819..46214f24566 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } - spin_unlock_bh(&nft_rbtree_lock); } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index f3448c29644..57d3e1af563 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -17,6 +17,8 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_reject.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, @@ -70,5 +72,38 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_reject_dump); +static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = { + [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, + [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, + [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, + [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMP_PKT_FILTERED, +}; + +int nft_reject_icmp_code(u8 code) +{ + BUG_ON(code > NFT_REJECT_ICMPX_MAX); + + return icmp_code_v4[code]; +} + +EXPORT_SYMBOL_GPL(nft_reject_icmp_code); + + +static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = { + [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, + [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, + [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, + [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMPV6_ADM_PROHIBITED, +}; + +int nft_reject_icmpv6_code(u8 code) +{ + BUG_ON(code > NFT_REJECT_ICMPX_MAX); + + return icmp_code_v6[code]; +} + +EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index b718a52a465..7b5f9d58680 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -14,17 +14,103 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_inet_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) { + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + switch (pkt->ops->pf) { case NFPROTO_IPV4: - return nft_reject_ipv4_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach(pkt->skb, + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; case NFPROTO_IPV6: - return nft_reject_ipv6_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nf_send_unreach6(net, pkt->skb, + nft_reject_icmpv6_code(priv->icmp_code), + pkt->ops->hooknum); + break; + } + break; + } + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static int nft_reject_inet_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code; + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; } + return 0; +} + +static int nft_reject_inet_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; } static struct nft_expr_type nft_reject_inet_type; @@ -32,8 +118,8 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .type = &nft_reject_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_inet_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, + .init = nft_reject_inet_init, + .dump = nft_reject_inet_dump, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b978bc310..133eb4772f1 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -71,18 +71,14 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { static const unsigned int xt_jumpstack_multiplier = 2; /* Registration hooks for targets. */ -int -xt_register_target(struct xt_target *target) +int xt_register_target(struct xt_target *target) { u_int8_t af = target->family; - int ret; - ret = mutex_lock_interruptible(&xt[af].mutex); - if (ret != 0) - return ret; + mutex_lock(&xt[af].mutex); list_add(&target->list, &xt[af].target); mutex_unlock(&xt[af].mutex); - return ret; + return 0; } EXPORT_SYMBOL(xt_register_target); @@ -125,20 +121,14 @@ xt_unregister_targets(struct xt_target *target, unsigned int n) } EXPORT_SYMBOL(xt_unregister_targets); -int -xt_register_match(struct xt_match *match) +int xt_register_match(struct xt_match *match) { u_int8_t af = match->family; - int ret; - - ret = mutex_lock_interruptible(&xt[af].mutex); - if (ret != 0) - return ret; + mutex_lock(&xt[af].mutex); list_add(&match->list, &xt[af].match); mutex_unlock(&xt[af].mutex); - - return ret; + return 0; } EXPORT_SYMBOL(xt_register_match); @@ -194,9 +184,7 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) struct xt_match *m; int err = -ENOENT; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision == revision) { @@ -239,9 +227,7 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) struct xt_target *t; int err = -ENOENT; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision == revision) { @@ -323,10 +309,7 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, { int have_rev, best = -1; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) { - *err = -EINTR; - return 1; - } + mutex_lock(&xt[af].mutex); if (target == 1) have_rev = target_revfn(af, name, revision, &best); else @@ -732,9 +715,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, { struct xt_table *t; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &net->xt.tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; @@ -883,10 +864,7 @@ struct xt_table *xt_register_table(struct net *net, goto out; } - ret = mutex_lock_interruptible(&xt[table->af].mutex); - if (ret != 0) - goto out_free; - + mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ list_for_each_entry(t, &net->xt.tables[table->af], list) { if (strcmp(t->name, table->name) == 0) { @@ -911,9 +889,8 @@ struct xt_table *xt_register_table(struct net *net, mutex_unlock(&xt[table->af].mutex); return table; - unlock: +unlock: mutex_unlock(&xt[table->af].mutex); -out_free: kfree(table); out: return ERR_PTR(ret); @@ -1124,22 +1101,11 @@ static const struct seq_operations xt_match_seq_ops = { static int xt_match_open(struct inode *inode, struct file *file) { - struct seq_file *seq; struct nf_mttg_trav *trav; - int ret; - - trav = kmalloc(sizeof(*trav), GFP_KERNEL); - if (trav == NULL) + trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav)); + if (!trav) return -ENOMEM; - ret = seq_open(file, &xt_match_seq_ops); - if (ret < 0) { - kfree(trav); - return ret; - } - - seq = file->private_data; - seq->private = trav; trav->nfproto = (unsigned long)PDE_DATA(inode); return 0; } @@ -1188,22 +1154,11 @@ static const struct seq_operations xt_target_seq_ops = { static int xt_target_open(struct inode *inode, struct file *file) { - struct seq_file *seq; struct nf_mttg_trav *trav; - int ret; - - trav = kmalloc(sizeof(*trav), GFP_KERNEL); - if (trav == NULL) + trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav)); + if (!trav) return -ENOMEM; - ret = seq_open(file, &xt_target_seq_ops); - if (ret < 0) { - kfree(trav); - return ret; - } - - seq = file->private_data; - seq->private = trav; trav->nfproto = (unsigned long)PDE_DATA(inode); return 0; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 73b73f687c5..02afaf48a72 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -126,7 +126,7 @@ hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); hash = hash ^ (t->proto & info->proto_mask); - return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; + return reciprocal_scale(hash, info->hmodulus) + info->hoffset; } static void diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 370adf622ce..604df6fae6f 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; - ret = gen_new_estimator(&est->bstats, &est->rstats, + ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, &est->lock, &cfg.opt); if (ret < 0) goto err2; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index f4e83300532..7198d660b4d 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) if (info->invert & ~1) return -EINVAL; - return info->id ? 0 : -EINVAL; + return 0; } static bool diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index f4af1bfafb1..96fa26b20b6 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -55,7 +55,8 @@ xt_cluster_hash(const struct nf_conn *ct, WARN_ON(1); break; } - return (((u64)hash * info->total_nodes) >> 32); + + return reciprocal_scale(hash, info->total_nodes); } static inline bool diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 1e634615ab9..d4bec261e74 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -120,7 +120,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) * accounting is enabled, so complain in the hope that someone notices. */ if (!nf_ct_acct_enabled(par->net)) { - pr_warning("Forcing CT accounting to be enabled\n"); + pr_warn("Forcing CT accounting to be enabled\n"); nf_ct_set_acct(par->net, true); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 47dc6836830..05fbc2a0be4 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -135,7 +135,7 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) * give results between [0 and cfg.size-1] and same hash distribution, * but using a multiply, less expensive than a divide */ - return ((u64)hash * ht->cfg.size) >> 32; + return reciprocal_scale(hash, ht->cfg.size); } static struct dsthash_ent * @@ -943,7 +943,7 @@ static int __init hashlimit_mt_init(void) sizeof(struct dsthash_ent), 0, 0, NULL); if (!hashlimit_cachep) { - pr_warning("unable to create slab cache\n"); + pr_warn("unable to create slab cache\n"); goto err2; } return 0; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index d7ca16b8b8d..f440f57a452 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -13,6 +13,7 @@ #include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> +#include <net/netfilter/br_netfilter.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); @@ -87,6 +88,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; + br_netfilter_enable(); + if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return -EINVAL; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 80c2e2d603e..5732cd64acc 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -84,13 +84,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_warn("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); + pr_warn("Protocol error: set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -134,13 +133,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_warn("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); + pr_warn("Protocol error: set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -230,8 +228,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_warn("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -239,8 +237,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_warn("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -248,8 +246,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { - pr_warning("Protocol error: SET target dimension " - "is over the limit!\n"); + pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -303,8 +300,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_warn("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -312,8 +309,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_warn("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -321,8 +318,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: SET target dimension " - "is over the limit!\n"); + pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -370,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy +/* Revision 3 target */ + +static unsigned int +set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); + ADT_OPT(map_opt, par->family, info->map_set.dim, + info->map_set.flags, 0, UINT_MAX); + + int ret; + + /* Normalize to fit into jiffies */ + if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && + add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par, &add_opt); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par, &del_opt); + if (info->map_set.index != IPSET_INVALID_ID) { + map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK | + IPSET_FLAG_MAP_SKBPRIO | + IPSET_FLAG_MAP_SKBQUEUE); + ret = match_set(info->map_set.index, skb, par, &map_opt, + info->map_set.flags & IPSET_INV_MATCH); + if (!ret) + return XT_CONTINUE; + if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) + skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) + ^ (map_opt.ext.skbmark); + if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) + skb->priority = map_opt.ext.skbprio; + if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && + skb->dev && + skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) + skb_set_queue_mapping(skb, map_opt.ext.skbqueue); + } + return XT_CONTINUE; +} + + +static int +set_target_v3_checkentry(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(par->net, + info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(par->net, + info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find del_set index %u as target\n", + info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->add_set.index); + return -ENOENT; + } + } + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_warn("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + !(par->hook_mask & (1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_warn("mapping of prio or/and queue is allowed only" + "from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + index = ip_set_nfnl_get_byindex(par->net, + info->map_set.index); + if (index == IPSET_INVALID_ID) { + pr_warn("Cannot find map_set index %u as target\n", + info->map_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, + info->del_set.index); + return -ENOENT; + } + } + + if (info->add_set.dim > IPSET_DIM_MAX || + info->del_set.dim > IPSET_DIM_MAX || + info->map_set.dim > IPSET_DIM_MAX) { + pr_warn("Protocol error: SET target dimension " + "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->del_set.index); + if (info->map_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->map_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_target_v3_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->del_set.index); + if (info->map_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(par->net, info->map_set.index); +} + + static struct xt_match set_matches[] __read_mostly = { { .name = "set", @@ -497,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = { .destroy = set_target_v2_destroy, .me = THIS_MODULE }, + /* --map-set support */ + { + .name = "SET", + .revision = 3, + .family = NFPROTO_IPV4, + .target = set_target_v3, + .targetsize = sizeof(struct xt_set_info_target_v3), + .checkentry = set_target_v3_checkentry, + .destroy = set_target_v3_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 3, + .family = NFPROTO_IPV6, + .target = set_target_v3, + .targetsize = sizeof(struct xt_set_info_target_v3), + .checkentry = set_target_v3_checkentry, + .destroy = set_target_v3_destroy, + .me = THIS_MODULE + }, }; static int __init xt_set_init(void) diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index d3c48b14ab9..5699adb9765 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -29,7 +29,6 @@ string_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ts_state state; bool invert; - memset(&state, 0, sizeof(struct ts_state)); invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT; return (skb_find_text((struct sk_buff *)skb, conf->from_offset, |