diff options
Diffstat (limited to 'net/netfilter')
33 files changed, 785 insertions, 1559 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e9410d17619..6d77cce481d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -46,6 +46,9 @@ config NF_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NF_LOG_COMMON + tristate + if NF_CONNTRACK config NF_CONNTRACK_MARK @@ -496,7 +499,7 @@ config NFT_LIMIT config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK - depends on NF_NAT + select NF_NAT tristate "Netfilter nf_tables nat module" help This option adds the "nat" expression that you can use to perform @@ -744,6 +747,9 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_LOG tristate "LOG target support" + select NF_LOG_COMMON + select NF_LOG_IPV4 + select NF_LOG_IPV6 if IPV6 default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -760,6 +766,14 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_NAT + tristate '"SNAT and DNAT" targets support' + depends on NF_NAT + ---help--- + This option enables the SNAT and DNAT targets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NETMAP tristate '"NETMAP" target support' depends on NF_NAT @@ -833,6 +847,7 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index bffdad774da..fad5fdba34e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -47,6 +47,9 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +# generic transport layer logging +obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o + obj-$(CONFIG_NF_NAT) += nf_nat.o # NAT protocols (nf_nat) @@ -92,7 +95,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o -obj-$(CONFIG_NF_NAT) += xt_nat.o +obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 1fbab0cdd30..024a2e25c8a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -35,11 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops); int nf_register_afinfo(const struct nf_afinfo *afinfo) { - int err; - - err = mutex_lock_interruptible(&afinfo_mutex); - if (err < 0) - return err; + mutex_lock(&afinfo_mutex); RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); mutex_unlock(&afinfo_mutex); return 0; @@ -58,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -68,18 +64,15 @@ static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) { struct nf_hook_ops *elem; - int err; - err = mutex_lock_interruptible(&nf_hook_mutex); - if (err < 0) - return err; + mutex_lock(&nf_hook_mutex); list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { if (reg->priority < elem->priority) break; } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; @@ -91,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); -#if defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e6836755c45..5c34e8d42e0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1906,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { { .hook = ip_vs_local_reply6, .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 581a6584ed0..fd3f444a4f9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1807,92 +1807,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, #endif -#if 0 - { - .procname = "timeout_established", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synsent", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synrecv", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_finwait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_timewait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_close", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_closewait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_lastack", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_listen", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synack", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_udp", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_icmp", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#endif { } }; @@ -2357,10 +2271,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - if (mutex_lock_interruptible(&ipvs->sync_mutex)) { - ret = -ERESTARTSYS; - goto out_dec; - } + mutex_lock(&ipvs->sync_mutex); if (cmd == IP_VS_SO_SET_STARTDAEMON) ret = start_sync_thread(net, dm->state, dm->mcast_ifn, dm->syncid); @@ -2370,11 +2281,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_dec; } - if (mutex_lock_interruptible(&__ip_vs_mutex)) { - ret = -ERESTARTSYS; - goto out_dec; - } - + mutex_lock(&__ip_vs_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ ret = ip_vs_flush(net, false); @@ -2659,9 +2566,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_daemon_user d[2]; memset(&d, 0, sizeof(d)); - if (mutex_lock_interruptible(&ipvs->sync_mutex)) - return -ERESTARTSYS; - + mutex_lock(&ipvs->sync_mutex); if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, @@ -2680,9 +2585,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } - if (mutex_lock_interruptible(&__ip_vs_mutex)) - return -ERESTARTSYS; - + mutex_lock(&__ip_vs_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index db801263ee9..eadffb29dec 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -886,8 +886,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); rcu_read_unlock(); if (!cp) { - if (param->pe_data) - kfree(param->pe_data); + kfree(param->pe_data); IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 73ba1cc7a88..56896a412bc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -38,6 +38,7 @@ #include <net/route.h> /* for ip_route_output */ #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/ip_tunnels.h> #include <net/addrconf.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> @@ -862,11 +863,15 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; - /* fix old IP header checksum */ ip_send_check(old_iph); + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); + if (IS_ERR(skb)) + goto tx_error; + + skb->transport_header = skb->network_header; + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -900,7 +905,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; @@ -953,6 +959,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ipv6_hdr(skb); } + /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */ + skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */ + if (IS_ERR(skb)) + goto tx_error; + skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct ipv6hdr)); @@ -967,8 +978,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->nexthdr = IPPROTO_IPV6; iph->payload_len = old_iph->payload_len; be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); - iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph)); iph->daddr = cp->daddr.in6; iph->saddr = saddr; iph->hop_limit = old_iph->hop_limit; @@ -988,7 +999,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; tx_error: - kfree_skb(skb); + if (!IS_ERR(skb)) + kfree_skb(skb); rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1f4f954c4b4..de88c4ab514 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -352,40 +352,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_enable(); } -static void death_by_event(unsigned long ul_conntrack) -{ - struct nf_conn *ct = (void *)ul_conntrack; - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); - - BUG_ON(ecache == NULL); - - if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { - /* bad luck, let's retry again */ - ecache->timeout.expires = jiffies + - (prandom_u32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ecache->timeout); - return; - } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); - nf_ct_put(ct); -} - -static void nf_ct_dying_timeout(struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); - - BUG_ON(ecache == NULL); - - /* set a new timer to retry event delivery */ - setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); - ecache->timeout.expires = jiffies + - (prandom_u32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ecache->timeout); -} - bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; @@ -394,15 +360,20 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_to_ns(ktime_get_real()); - if (!nf_ct_is_dying(ct) && - unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct, - portid, report) < 0)) { + if (nf_ct_is_dying(ct)) + goto delete; + + if (nf_conntrack_event_report(IPCT_DESTROY, ct, + portid, report) < 0) { /* destroy event was not delivered */ nf_ct_delete_from_lists(ct); - nf_ct_dying_timeout(ct); + nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } + + nf_conntrack_ecache_work(nf_ct_net(ct)); set_bit(IPS_DYING_BIT, &ct->status); + delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; @@ -1464,26 +1435,6 @@ void nf_conntrack_flush_report(struct net *net, u32 portid, int report) } EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); -static void nf_ct_release_dying_list(struct net *net) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct; - struct hlist_nulls_node *n; - int cpu; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - /* never fails to remove them, no listeners at this point */ - nf_ct_kill(ct); - } - spin_unlock_bh(&pcpu->lock); - } -} - static int untrack_refs(void) { int cnt = 0, cpu; @@ -1548,7 +1499,6 @@ i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); - nf_ct_release_dying_list(net); if (atomic_read(&net->ct.count) != 0) busy = 1; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 1df17614656..4e78c57b818 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,6 +29,90 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); +#define ECACHE_RETRY_WAIT (HZ/10) + +enum retry_state { + STATE_CONGESTED, + STATE_RESTART, + STATE_DONE, +}; + +static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) +{ + struct nf_conn *refs[16]; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + unsigned int evicted = 0; + enum retry_state ret = STATE_DONE; + + spin_lock(&pcpu->lock); + + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + if (nf_ct_is_dying(ct)) + continue; + + if (nf_conntrack_event(IPCT_DESTROY, ct)) { + ret = STATE_CONGESTED; + break; + } + + /* we've got the event delivered, now it's dying */ + set_bit(IPS_DYING_BIT, &ct->status); + refs[evicted] = ct; + + if (++evicted >= ARRAY_SIZE(refs)) { + ret = STATE_RESTART; + break; + } + } + + spin_unlock(&pcpu->lock); + + /* can't _put while holding lock */ + while (evicted) + nf_ct_put(refs[--evicted]); + + return ret; +} + +static void ecache_work(struct work_struct *work) +{ + struct netns_ct *ctnet = + container_of(work, struct netns_ct, ecache_dwork.work); + int cpu, delay = -1; + struct ct_pcpu *pcpu; + + local_bh_disable(); + + for_each_possible_cpu(cpu) { + enum retry_state ret; + + pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); + + ret = ecache_work_evict_list(pcpu); + + switch (ret) { + case STATE_CONGESTED: + delay = ECACHE_RETRY_WAIT; + goto out; + case STATE_RESTART: + delay = 0; + break; + case STATE_DONE: + break; + } + } + + out: + local_bh_enable(); + + ctnet->ecache_dwork_pending = delay > 0; + if (delay >= 0) + schedule_delayed_work(&ctnet->ecache_dwork, delay); +} + /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) @@ -157,7 +241,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; -static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; #ifdef CONFIG_SYSCTL static struct ctl_table event_sysctl_table[] = { @@ -168,13 +251,6 @@ static struct ctl_table event_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "nf_conntrack_events_retry_timeout", - .data = &init_net.ct.sysctl_events_retry_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, {} }; #endif /* CONFIG_SYSCTL */ @@ -196,7 +272,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net) goto out; table[0].data = &net->ct.sysctl_events; - table[1].data = &net->ct.sysctl_events_retry_timeout; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -238,12 +313,13 @@ static void nf_conntrack_event_fini_sysctl(struct net *net) int nf_conntrack_ecache_pernet_init(struct net *net) { net->ct.sysctl_events = nf_ct_events; - net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); return nf_conntrack_event_init_sysctl(net); } void nf_conntrack_ecache_pernet_fini(struct net *net) { + cancel_delayed_work_sync(&net->ct.ecache_dwork); nf_conntrack_event_fini_sysctl(net); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 300ed1eec72..355a5c4ef76 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -745,8 +745,7 @@ static int ctnetlink_done(struct netlink_callback *cb) { if (cb->args[1]) nf_ct_put((struct nf_conn *)cb->args[1]); - if (cb->data) - kfree(cb->data); + kfree(cb->data); return 0; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 85296d4eac0..daad6022c68 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,16 +16,22 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; +static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); static struct nf_logger *__find_logger(int pf, const char *str_logger) { - struct nf_logger *t; + struct nf_logger *log; + int i; + + for (i = 0; i < NF_LOG_TYPE_MAX; i++) { + if (loggers[pf][i] == NULL) + continue; - list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) { - if (!strnicmp(str_logger, t->name, strlen(t->name))) - return t; + log = rcu_dereference_protected(loggers[pf][i], + lockdep_is_held(&nf_log_mutex)); + if (!strnicmp(str_logger, log->name, strlen(log->name))) + return log; } return NULL; @@ -73,17 +79,14 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(logger->list); i++) - INIT_LIST_HEAD(&logger->list[i]); - mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) - list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); + rcu_assign_pointer(loggers[i][logger->type], logger); } else { /* register at end of list to honor first register win */ - list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); + rcu_assign_pointer(loggers[pf][logger->type], logger); } mutex_unlock(&nf_log_mutex); @@ -98,7 +101,7 @@ void nf_log_unregister(struct nf_logger *logger) mutex_lock(&nf_log_mutex); for (i = 0; i < NFPROTO_NUMPROTO; i++) - list_del(&logger->list[i]); + RCU_INIT_POINTER(loggers[i][logger->type], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unregister); @@ -129,6 +132,48 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf) } EXPORT_SYMBOL(nf_log_unbind_pf); +void nf_logger_request_module(int pf, enum nf_log_type type) +{ + if (loggers[pf][type] == NULL) + request_module("nf-logger-%u-%u", pf, type); +} +EXPORT_SYMBOL_GPL(nf_logger_request_module); + +int nf_logger_find_get(int pf, enum nf_log_type type) +{ + struct nf_logger *logger; + int ret = -ENOENT; + + logger = loggers[pf][type]; + if (logger == NULL) + request_module("nf-logger-%u-%u", pf, type); + + rcu_read_lock(); + logger = rcu_dereference(loggers[pf][type]); + if (logger == NULL) + goto out; + + if (logger && try_module_get(logger->me)) + ret = 0; +out: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_logger_find_get); + +void nf_logger_put(int pf, enum nf_log_type type) +{ + struct nf_logger *logger; + + BUG_ON(loggers[pf][type] == NULL); + + rcu_read_lock(); + logger = rcu_dereference(loggers[pf][type]); + module_put(logger->me); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_logger_put); + void nf_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, @@ -143,7 +188,11 @@ void nf_log_packet(struct net *net, const struct nf_logger *logger; rcu_read_lock(); - logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (loginfo != NULL) + logger = rcu_dereference(loggers[pf][loginfo->type]); + else + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -154,6 +203,63 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct nf_log_buf { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct nf_log_buf emergency, *emergency_ptr = &emergency; + +__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} +EXPORT_SYMBOL_GPL(nf_log_buf_add); + +struct nf_log_buf *nf_log_buf_open(void) +{ + struct nf_log_buf *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} +EXPORT_SYMBOL_GPL(nf_log_buf_open); + +void nf_log_buf_close(struct nf_log_buf *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + emergency_ptr = m; + local_bh_enable(); + } +} +EXPORT_SYMBOL_GPL(nf_log_buf_close); + #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { @@ -188,8 +294,7 @@ static int seq_show(struct seq_file *s, void *v) { loff_t *pos = v; const struct nf_logger *logger; - struct nf_logger *t; - int ret; + int i, ret; struct net *net = seq_file_net(s); logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], @@ -203,11 +308,16 @@ static int seq_show(struct seq_file *s, void *v) if (ret < 0) return ret; - list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) { - ret = seq_printf(s, "%s", t->name); + for (i = 0; i < NF_LOG_TYPE_MAX; i++) { + if (loggers[*pos][i] == NULL) + continue; + + logger = rcu_dereference_protected(loggers[*pos][i], + lockdep_is_held(&nf_log_mutex)); + ret = seq_printf(s, "%s", logger->name); if (ret < 0) return ret; - if (&t->list[*pos] != nf_loggers_l[*pos].prev) { + if (i == 0 && loggers[*pos][i + 1] != NULL) { ret = seq_printf(s, ","); if (ret < 0) return ret; @@ -389,14 +499,5 @@ static struct pernet_operations nf_log_net_ops = { int __init netfilter_log_init(void) { - int i, ret; - - ret = register_pernet_subsys(&nf_log_net_ops); - if (ret < 0) - return ret; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) - INIT_LIST_HEAD(&(nf_loggers_l[i])); - - return 0; + return register_pernet_subsys(&nf_log_net_ops); } diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c new file mode 100644 index 00000000000..eeb8ef4ff1a --- /dev/null +++ b/net/netfilter/nf_log_common.c @@ -0,0 +1,187 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset) +{ + struct udphdr _udph; + const struct udphdr *uh; + + if (proto == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + nf_log_buf_add(m, "PROTO=UDP "); + else /* Max length: 14 "PROTO=UDPLITE " */ + nf_log_buf_add(m, "PROTO=UDPLITE "); + + if (fragment) + goto out; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", + ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); + +out: + return 0; +} +EXPORT_SYMBOL_GPL(nf_log_dump_udp_header); + +int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset, + unsigned int logflags) +{ + struct tcphdr _tcph; + const struct tcphdr *th; + + /* Max length: 10 "PROTO=TCP " */ + nf_log_buf_add(m, "PROTO=TCP "); + + if (fragment) + return 0; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u ", + ntohs(th->source), ntohs(th->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (logflags & XT_LOG_TCPSEQ) { + nf_log_buf_add(m, "SEQ=%u ACK=%u ", + ntohl(th->seq), ntohl(th->ack_seq)); + } + + /* Max length: 13 "WINDOW=65535 " */ + nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); + /* Max length: 9 "RES=0x3C " */ + nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & + TCP_RESERVED_BITS) >> 22)); + /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->cwr) + nf_log_buf_add(m, "CWR "); + if (th->ece) + nf_log_buf_add(m, "ECE "); + if (th->urg) + nf_log_buf_add(m, "URG "); + if (th->ack) + nf_log_buf_add(m, "ACK "); + if (th->psh) + nf_log_buf_add(m, "PSH "); + if (th->rst) + nf_log_buf_add(m, "RST "); + if (th->syn) + nf_log_buf_add(m, "SYN "); + if (th->fin) + nf_log_buf_add(m, "FIN "); + /* Max length: 11 "URGP=65535 " */ + nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); + + if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; + unsigned int i; + unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); + + op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), + optsize, _opt); + if (op == NULL) { + nf_log_buf_add(m, "OPT (TRUNCATED)"); + return 1; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + + nf_log_buf_add(m, ") "); + } + + return 0; +} +EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); + +void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) +{ + if (!sk || sk->sk_state == TCP_TIME_WAIT) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + const struct cred *cred = sk->sk_socket->file->f_cred; + nf_log_buf_add(m, "UID=%u GID=%u ", + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } + read_unlock_bh(&sk->sk_callback_lock); +} +EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid); + +void +nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix) +{ + nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, + in ? in->name : "", + out ? out->name : ""); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + const struct net_device *physindev; + const struct net_device *physoutdev; + + physindev = skb->nf_bridge->physindev; + if (physindev && in != physindev) + nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; + if (physoutdev && out != physoutdev) + nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); + } +#endif +} +EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); + +static int __init nf_log_common_init(void) +{ + return 0; +} + +static void __exit nf_log_common_exit(void) {} + +module_init(nf_log_common_init); +module_exit(nf_log_common_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index a49907b1dab..552f97cd9fd 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -710,7 +710,7 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 83a72a235ca..fbce552a796 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -95,7 +95,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index c8be2cdac0b..b8067b53ff3 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -78,7 +78,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = dccp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 754536f2c67..cbc7ade1487 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -59,7 +59,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = sctp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 83ec8a6e4c3..37f5505f452 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -79,7 +79,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_tcp = { .manip_pkt = tcp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = tcp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index 7df613fb34a..b0ede2f0d8b 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -70,7 +70,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_udp = { .manip_pkt = udp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = udp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 776a0d1317b..368f14e01e7 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -69,7 +69,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = udplite_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index f042ae52155..c68c1e58b36 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -26,9 +26,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg) struct nf_sockopt_ops *ops; int ret = 0; - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return -EINTR; - + mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, @@ -65,9 +63,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, { struct nf_sockopt_ops *ops; - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&nf_sockopt_mutex); list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8746ff9a835..deeb95fb702 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -899,6 +899,9 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) static void nft_chain_stats_replace(struct nft_base_chain *chain, struct nft_stats __percpu *newstats) { + if (newstats == NULL) + return; + if (chain->stats) { struct nft_stats __percpu *oldstats = nft_dereference(chain->stats); @@ -2247,80 +2250,7 @@ err: return err; } -static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) -{ - const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; - - if (cb->args[1]) - return skb->len; - - rcu_read_lock(); - cb->seq = ctx->net->nft.base_seq; - - list_for_each_entry_rcu(set, &ctx->table->sets, list) { - if (idx < s_idx) - goto cont; - if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - cb->args[1] = 1; -done: - rcu_read_unlock(); - return skb->len; -} - -static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) -{ - const struct nft_set *set; - unsigned int idx, s_idx = cb->args[0]; - struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; - - if (cb->args[1]) - return skb->len; - - rcu_read_lock(); - cb->seq = ctx->net->nft.base_seq; - - list_for_each_entry_rcu(table, &ctx->afi->tables, list) { - if (cur_table) { - if (cur_table != table) - continue; - - cur_table = NULL; - } - ctx->table = table; - idx = 0; - list_for_each_entry_rcu(set, &ctx->table->sets, list) { - if (idx < s_idx) - goto cont; - if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - cb->args[2] = (unsigned long) table; - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - } - cb->args[1] = 1; -done: - rcu_read_unlock(); - return skb->len; -} - -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; @@ -2328,6 +2258,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); int cur_family = cb->args[3]; + struct nft_ctx *ctx = cb->data, ctx_set; if (cb->args[1]) return skb->len; @@ -2336,28 +2267,34 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, cb->seq = net->nft.base_seq; list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (ctx->afi && ctx->afi != afi) + continue; + if (cur_family) { if (afi->family != cur_family) continue; cur_family = 0; } - list_for_each_entry_rcu(table, &afi->tables, list) { + if (ctx->table && ctx->table != table) + continue; + if (cur_table) { if (cur_table != table) continue; cur_table = NULL; } - - ctx->table = table; - ctx->afi = afi; idx = 0; - list_for_each_entry_rcu(set, &ctx->table->sets, list) { + list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; - if (nf_tables_fill_set(skb, ctx, set, + + ctx_set = *ctx; + ctx_set.table = table; + ctx_set.afi = afi; + if (nf_tables_fill_set(skb, &ctx_set, set, NFT_MSG_NEWSET, NLM_F_MULTI) < 0) { cb->args[0] = idx; @@ -2379,31 +2316,10 @@ done: return skb->len; } -static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +static int nf_tables_dump_sets_done(struct netlink_callback *cb) { - const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct nlattr *nla[NFTA_SET_MAX + 1]; - struct nft_ctx ctx; - int err, ret; - - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, - nft_set_policy); - if (err < 0) - return err; - - err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); - if (err < 0) - return err; - - if (ctx.table == NULL) { - if (ctx.afi == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else - ret = nf_tables_dump_sets_family(&ctx, skb, cb); - } else - ret = nf_tables_dump_sets_table(&ctx, skb, cb); - - return ret; + kfree(cb->data); + return 0; } #define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */ @@ -2426,7 +2342,17 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_sets, + .done = nf_tables_dump_sets_done, }; + struct nft_ctx *ctx_dump; + + ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL); + if (ctx_dump == NULL) + return -ENOMEM; + + *ctx_dump = ctx; + c.data = ctx_dump; + return netlink_dump_start(nlsk, skb, nlh, &c); } @@ -3150,6 +3076,9 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int rem, err = 0; + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) + return -EINVAL; + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); if (err < 0) return err; @@ -3208,16 +3137,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, goto err2; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); - if (trans == NULL) + if (trans == NULL) { + err = -ENOMEM; goto err2; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); - - nft_data_uninit(&elem.key, NFT_DATA_VALUE); - if (set->flags & NFT_SET_MAP) - nft_data_uninit(&elem.data, set->dtype); - + return 0; err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -3233,6 +3160,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int rem, err = 0; + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) + return -EINVAL; + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; @@ -3380,7 +3310,7 @@ static int nf_tables_commit(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; - struct nft_set *set; + struct nft_trans_elem *te; /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -3466,13 +3396,17 @@ static int nf_tables_commit(struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: - nf_tables_setelem_notify(&trans->ctx, - nft_trans_elem_set(trans), - &nft_trans_elem(trans), + te = (struct nft_trans_elem *)trans->data; + nf_tables_setelem_notify(&trans->ctx, te->set, + &te->elem, NFT_MSG_DELSETELEM, 0); - set = nft_trans_elem_set(trans); - set->ops->get(set, &nft_trans_elem(trans)); - set->ops->remove(set, &nft_trans_elem(trans)); + te->set->ops->get(te->set, &te->elem); + te->set->ops->remove(te->set, &te->elem); + nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); + if (te->elem.flags & NFT_SET_MAP) { + nft_data_uninit(&te->elem.data, + te->set->dtype); + } nft_trans_destroy(trans); break; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c138b8fbe28..f37f0716a9f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -222,6 +222,51 @@ replay: } } +struct nfnl_err { + struct list_head head; + struct nlmsghdr *nlh; + int err; +}; + +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +{ + struct nfnl_err *nfnl_err; + + nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL); + if (nfnl_err == NULL) + return -ENOMEM; + + nfnl_err->nlh = nlh; + nfnl_err->err = err; + list_add_tail(&nfnl_err->head, list); + + return 0; +} + +static void nfnl_err_del(struct nfnl_err *nfnl_err) +{ + list_del(&nfnl_err->head); + kfree(nfnl_err); +} + +static void nfnl_err_reset(struct list_head *err_list) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) + nfnl_err_del(nfnl_err); +} + +static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) { + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + nfnl_err_del(nfnl_err); + } +} + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; bool success = true, done = false; + static LIST_HEAD(err_list); int err; if (subsys_id >= NFNL_SUBSYS_COUNT) @@ -287,6 +333,7 @@ replay: type = nlh->nlmsg_type; if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ + nfnl_err_reset(&err_list); success = false; goto done; } else if (type == NFNL_MSG_BATCH_END) { @@ -333,6 +380,7 @@ replay: * original skb. */ if (err == -EAGAIN) { + nfnl_err_reset(&err_list); ss->abort(skb); nfnl_unlock(subsys_id); kfree_skb(nskb); @@ -341,11 +389,24 @@ replay: } ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* Errors are delivered once the full batch has been + * processed, this avoids that the same error is + * reported several times when replaying the batch. + */ + if (nfnl_err_add(&err_list, nlh, err) < 0) { + /* We failed to enqueue an error, reset the + * list of errors and send OOM to userspace + * pointing to the batch header. + */ + nfnl_err_reset(&err_list); + netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); + success = false; + goto done; + } /* We don't stop processing the batch on errors, thus, * userspace gets all the errors that the batch * triggers. */ - netlink_ack(skb, nlh, err); if (err) success = false; } @@ -361,6 +422,7 @@ done: else ss->abort(skb); + nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2baa125c2e8..3ea0eacbd97 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -41,6 +41,7 @@ struct nf_acct { }; #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) +#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ static int nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, @@ -77,7 +78,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, smp_mb__before_atomic(); /* reset overquota flag if quota is enabled. */ if ((matching->flags & NFACCT_F_QUOTA)) - clear_bit(NFACCT_F_OVERQUOTA, &matching->flags); + clear_bit(NFACCT_OVERQUOTA_BIT, + &matching->flags); return 0; } return -EBUSY; @@ -129,6 +131,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; + u32 old_flags; event |= NFNL_SUBSYS_ACCT << 8; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); @@ -143,12 +146,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; + old_flags = acct->flags; if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); smp_mb__before_atomic(); if (acct->flags & NFACCT_F_QUOTA) - clear_bit(NFACCT_F_OVERQUOTA, &acct->flags); + clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); @@ -160,7 +164,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; - if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) || + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) goto nla_put_failure; } @@ -412,7 +416,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) ret = now > *quota; if (now >= *quota && - !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) { + !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { nfnl_overquota_report(nfacct); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d292c8d286e..a11c5ff2f72 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -773,6 +773,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", + .type = NF_LOG_TYPE_ULOG, .logfn = &nfulnl_log_packet, .me = THIS_MODULE, }; @@ -1105,6 +1106,9 @@ MODULE_DESCRIPTION("netfilter userspace logging"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); +MODULE_ALIAS_NF_LOGGER(AF_INET, 1); +MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); +MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 4080ed6a072..8892b7b6184 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -15,209 +15,40 @@ #include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> -#include <linux/vmalloc.h> +#include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -#define NFT_HASH_MIN_SIZE 4UL - -struct nft_hash { - struct nft_hash_table __rcu *tbl; -}; - -struct nft_hash_table { - unsigned int size; - struct nft_hash_elem __rcu *buckets[]; -}; +/* We target a hash table size of 4, element hint is 75% of final size */ +#define NFT_HASH_ELEMENT_HINT 3 struct nft_hash_elem { - struct nft_hash_elem __rcu *next; + struct rhash_head node; struct nft_data key; struct nft_data data[]; }; -#define nft_hash_for_each_entry(i, head) \ - for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) -#define nft_hash_for_each_entry_rcu(i, head) \ - for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) - -static u32 nft_hash_rnd __read_mostly; -static bool nft_hash_rnd_initted __read_mostly; - -static unsigned int nft_hash_data(const struct nft_data *data, - unsigned int hsize, unsigned int len) -{ - unsigned int h; - - h = jhash(data->data, len, nft_hash_rnd); - return h & (hsize - 1); -} - static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; - unsigned int h; - - h = nft_hash_data(key, tbl->size, set->klen); - nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { - if (nft_data_cmp(&he->key, key, set->klen)) - continue; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); - return true; - } - return false; -} - -static void nft_hash_tbl_free(const struct nft_hash_table *tbl) -{ - kvfree(tbl); -} - -static unsigned int nft_hash_tbl_size(unsigned int nelem) -{ - return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE); -} - -static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) -{ - struct nft_hash_table *tbl; - size_t size; - - size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); - if (tbl == NULL) - tbl = vzalloc(size); - if (tbl == NULL) - return NULL; - tbl->size = nbuckets; - - return tbl; -} - -static void nft_hash_chain_unzip(const struct nft_set *set, - const struct nft_hash_table *ntbl, - struct nft_hash_table *tbl, unsigned int n) -{ - struct nft_hash_elem *he, *last, *next; - unsigned int h; - - he = nft_dereference(tbl->buckets[n]); - if (he == NULL) - return; - h = nft_hash_data(&he->key, ntbl->size, set->klen); - - /* Find last element of first chain hashing to bucket h */ - last = he; - nft_hash_for_each_entry(he, he->next) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) - break; - last = he; - } - /* Unlink first chain from the old table */ - RCU_INIT_POINTER(tbl->buckets[n], last->next); + he = rhashtable_lookup(priv, key); + if (he && set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); - /* If end of chain reached, done */ - if (he == NULL) - return; - - /* Find first element of second chain hashing to bucket h */ - next = NULL; - nft_hash_for_each_entry(he, he->next) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) - continue; - next = he; - break; - } - - /* Link the two chains */ - RCU_INIT_POINTER(last->next, next); -} - -static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) -{ - struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; - struct nft_hash_elem *he; - unsigned int i, h; - bool complete; - - ntbl = nft_hash_tbl_alloc(tbl->size * 2); - if (ntbl == NULL) - return -ENOMEM; - - /* Link new table's buckets to first element in the old table - * hashing to the new bucket. - */ - for (i = 0; i < ntbl->size; i++) { - h = i < tbl->size ? i : i - tbl->size; - nft_hash_for_each_entry(he, tbl->buckets[h]) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) - continue; - RCU_INIT_POINTER(ntbl->buckets[i], he); - break; - } - } - - /* Publish new table */ - rcu_assign_pointer(priv->tbl, ntbl); - - /* Unzip interleaved hash chains */ - do { - /* Wait for readers to use new table/unzipped chains */ - synchronize_rcu(); - - complete = true; - for (i = 0; i < tbl->size; i++) { - nft_hash_chain_unzip(set, ntbl, tbl, i); - if (tbl->buckets[i] != NULL) - complete = false; - } - } while (!complete); - - nft_hash_tbl_free(tbl); - return 0; -} - -static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) -{ - struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; - struct nft_hash_elem __rcu **pprev; - unsigned int i; - - ntbl = nft_hash_tbl_alloc(tbl->size / 2); - if (ntbl == NULL) - return -ENOMEM; - - for (i = 0; i < ntbl->size; i++) { - ntbl->buckets[i] = tbl->buckets[i]; - - for (pprev = &ntbl->buckets[i]; *pprev != NULL; - pprev = &nft_dereference(*pprev)->next) - ; - RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); - } - - /* Publish new table */ - rcu_assign_pointer(priv->tbl, ntbl); - synchronize_rcu(); - - nft_hash_tbl_free(tbl); - return 0; + return !!he; } static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl = nft_dereference(priv->tbl); + struct rhashtable *priv = nft_set_priv(set); struct nft_hash_elem *he; - unsigned int size, h; + unsigned int size; if (elem->flags != 0) return -EINVAL; @@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - h = nft_hash_data(&he->key, tbl->size, set->klen); - RCU_INIT_POINTER(he->next, tbl->buckets[h]); - rcu_assign_pointer(tbl->buckets[h], he); - - /* Expand table when exceeding 75% load */ - if (set->nelems + 1 > tbl->size / 4 * 3) - nft_hash_tbl_expand(set, priv); + rhashtable_insert(priv, &he->node, GFP_KERNEL); return 0; } @@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set, static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl = nft_dereference(priv->tbl); - struct nft_hash_elem *he, __rcu **pprev; + struct rhashtable *priv = nft_set_priv(set); + struct rhash_head *he, __rcu **pprev; pprev = elem->cookie; - he = nft_dereference((*pprev)); + he = rht_dereference((*pprev), priv); + + rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL); - RCU_INIT_POINTER(*pprev, he->next); synchronize_rcu(); kfree(he); - - /* Shrink table beneath 30% load */ - if (set->nelems - 1 < tbl->size * 3 / 10 && - tbl->size > NFT_HASH_MIN_SIZE) - nft_hash_tbl_shrink(set, priv); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); - struct nft_hash_elem __rcu * const *pprev; + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); + struct rhash_head __rcu * const *pprev; struct nft_hash_elem *he; - unsigned int h; + u32 h; - h = nft_hash_data(&elem->key, tbl->size, set->klen); + h = rhashtable_hashfn(priv, &elem->key, set->klen); pprev = &tbl->buckets[h]; - nft_hash_for_each_entry(he, tbl->buckets[h]) { + rht_for_each_entry_rcu(he, tbl->buckets[h], node) { if (nft_data_cmp(&he->key, &elem->key, set->klen)) { - pprev = &he->next; + pprev = &he->node.next; continue; } @@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; const struct nft_hash_elem *he; struct nft_set_elem elem; unsigned int i; + tbl = rht_dereference_rcu(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - nft_hash_for_each_entry(he, tbl->buckets[i]) { + rht_for_each_entry_rcu(he, tbl->buckets[i], node) { if (iter->count < iter->skip) goto cont; @@ -329,48 +150,48 @@ cont: static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct nft_hash); + return sizeof(struct rhashtable); +} + +static int lockdep_nfnl_lock_is_held(void) +{ + return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); } static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl; - unsigned int size; + struct rhashtable *priv = nft_set_priv(set); + struct rhashtable_params params = { + .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT, + .head_offset = offsetof(struct nft_hash_elem, node), + .key_offset = offsetof(struct nft_hash_elem, key), + .key_len = set->klen, + .hashfn = jhash, + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + .mutex_is_held = lockdep_nfnl_lock_is_held, + }; - if (unlikely(!nft_hash_rnd_initted)) { - get_random_bytes(&nft_hash_rnd, 4); - nft_hash_rnd_initted = true; - } - - size = NFT_HASH_MIN_SIZE; - if (desc->size) - size = nft_hash_tbl_size(desc->size); - - tbl = nft_hash_tbl_alloc(size); - if (tbl == NULL) - return -ENOMEM; - RCU_INIT_POINTER(priv->tbl, tbl); - return 0; + return rhashtable_init(priv, ¶ms); } static void nft_hash_destroy(const struct nft_set *set) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl = priv->tbl; struct nft_hash_elem *he, *next; unsigned int i; for (i = 0; i < tbl->size; i++) { - for (he = nft_dereference(tbl->buckets[i]); he != NULL; - he = next) { - next = nft_dereference(he->next); + for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); + he != NULL; he = next) { + next = rht_entry(he->node.next, struct nft_hash_elem, node); nft_hash_elem_destroy(set, he); } } - kfree(tbl); + rhashtable_destroy(priv); } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, @@ -383,8 +204,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); if (desc->size) { - est->size = sizeof(struct nft_hash) + - nft_hash_tbl_size(desc->size) * + est->size = sizeof(struct rhashtable) + + roundup_pow_of_two(desc->size * 4 / 3) * sizeof(struct nft_hash_elem *) + desc->size * esize; } else { diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 10cfb156cdf..bde05f28cf1 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, + [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, + [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; static int nft_log_init(const struct nft_ctx *ctx, @@ -50,6 +53,7 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; + int ret; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -57,30 +61,74 @@ static int nft_log_init(const struct nft_ctx *ctx, if (priv->prefix == NULL) return -ENOMEM; nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); - } else + } else { priv->prefix = (char *)nft_log_null_prefix; + } - li->type = NF_LOG_TYPE_ULOG; + li->type = NF_LOG_TYPE_LOG; + if (tb[NFTA_LOG_LEVEL] != NULL && + tb[NFTA_LOG_GROUP] != NULL) + return -EINVAL; if (tb[NFTA_LOG_GROUP] != NULL) + li->type = NF_LOG_TYPE_ULOG; + + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (tb[NFTA_LOG_LEVEL] != NULL) { + li->u.log.level = + ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); + } else { + li->u.log.level = 4; + } + if (tb[NFTA_LOG_FLAGS] != NULL) { + li->u.log.logflags = + ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + } + break; + case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + if (tb[NFTA_LOG_SNAPLEN] != NULL) { + li->u.ulog.copy_len = + ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + } + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + break; + } - if (tb[NFTA_LOG_SNAPLEN] != NULL) - li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); - if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { - li->u.ulog.qthreshold = - ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + if (ctx->afi->family == NFPROTO_INET) { + ret = nf_logger_find_get(NFPROTO_IPV4, li->type); + if (ret < 0) + return ret; + + ret = nf_logger_find_get(NFPROTO_IPV6, li->type); + if (ret < 0) { + nf_logger_put(NFPROTO_IPV4, li->type); + return ret; + } + return 0; } - return 0; + return nf_logger_find_get(ctx->afi->family, li->type); } static void nft_log_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); + + if (ctx->afi->family == NFPROTO_INET) { + nf_logger_put(NFPROTO_IPV4, li->type); + nf_logger_put(NFPROTO_IPV6, li->type); + } else { + nf_logger_put(ctx->afi->family, li->type); + } } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -91,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->prefix != nft_log_null_prefix) if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) goto nla_put_failure; - if (li->u.ulog.group) - if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) - goto nla_put_failure; - if (li->u.ulog.copy_len) - if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, - htonl(li->u.ulog.copy_len))) + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level))) goto nla_put_failure; - if (li->u.ulog.qthreshold) - if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, - htons(li->u.ulog.qthreshold))) + + if (li->u.log.logflags) { + if (nla_put_be32(skb, NFTA_LOG_FLAGS, + htonl(li->u.log.logflags))) + goto nla_put_failure; + } + break; + case NF_LOG_TYPE_ULOG: + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; + + if (li->u.ulog.copy_len) { + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + } + if (li->u.ulog.qthreshold) { + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + } + break; + } return 0; nla_put_failure: diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e1836ff8819..46214f24566 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } - spin_unlock_bh(&nft_rbtree_lock); } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 227aa11e840..272ae4d6fdf 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -71,18 +71,14 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { static const unsigned int xt_jumpstack_multiplier = 2; /* Registration hooks for targets. */ -int -xt_register_target(struct xt_target *target) +int xt_register_target(struct xt_target *target) { u_int8_t af = target->family; - int ret; - ret = mutex_lock_interruptible(&xt[af].mutex); - if (ret != 0) - return ret; + mutex_lock(&xt[af].mutex); list_add(&target->list, &xt[af].target); mutex_unlock(&xt[af].mutex); - return ret; + return 0; } EXPORT_SYMBOL(xt_register_target); @@ -125,20 +121,14 @@ xt_unregister_targets(struct xt_target *target, unsigned int n) } EXPORT_SYMBOL(xt_unregister_targets); -int -xt_register_match(struct xt_match *match) +int xt_register_match(struct xt_match *match) { u_int8_t af = match->family; - int ret; - - ret = mutex_lock_interruptible(&xt[af].mutex); - if (ret != 0) - return ret; + mutex_lock(&xt[af].mutex); list_add(&match->list, &xt[af].match); mutex_unlock(&xt[af].mutex); - - return ret; + return 0; } EXPORT_SYMBOL(xt_register_match); @@ -194,9 +184,7 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) struct xt_match *m; int err = -ENOENT; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision == revision) { @@ -239,9 +227,7 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) struct xt_target *t; int err = -ENOENT; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision == revision) { @@ -323,10 +309,7 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, { int have_rev, best = -1; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) { - *err = -EINTR; - return 1; - } + mutex_lock(&xt[af].mutex); if (target == 1) have_rev = target_revfn(af, name, revision, &best); else @@ -711,28 +694,15 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - for_each_possible_cpu(cpu) { - if (info->size <= PAGE_SIZE) - kfree(info->entries[cpu]); - else - vfree(info->entries[cpu]); - } + for_each_possible_cpu(cpu) + kvfree(info->entries[cpu]); if (info->jumpstack != NULL) { - if (sizeof(void *) * info->stacksize > PAGE_SIZE) { - for_each_possible_cpu(cpu) - vfree(info->jumpstack[cpu]); - } else { - for_each_possible_cpu(cpu) - kfree(info->jumpstack[cpu]); - } + for_each_possible_cpu(cpu) + kvfree(info->jumpstack[cpu]); + kvfree(info->jumpstack); } - if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE) - vfree(info->jumpstack); - else - kfree(info->jumpstack); - free_percpu(info->stackptr); kfree(info); @@ -745,9 +715,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, { struct xt_table *t; - if (mutex_lock_interruptible(&xt[af].mutex) != 0) - return ERR_PTR(-EINTR); - + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &net->xt.tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; @@ -896,10 +864,7 @@ struct xt_table *xt_register_table(struct net *net, goto out; } - ret = mutex_lock_interruptible(&xt[table->af].mutex); - if (ret != 0) - goto out_free; - + mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ list_for_each_entry(t, &net->xt.tables[table->af], list) { if (strcmp(t->name, table->name) == 0) { @@ -924,9 +889,8 @@ struct xt_table *xt_register_table(struct net *net, mutex_unlock(&xt[table->af].mutex); return table; - unlock: +unlock: mutex_unlock(&xt[table->af].mutex); -out_free: kfree(table); out: return ERR_PTR(ret); diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 993de2ba89d..3ba31c194cc 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -50,11 +50,14 @@ struct xt_led_info_internal { struct timer_list timer; }; +#define XT_LED_BLINK_DELAY 50 /* ms */ + static unsigned int led_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_led_info *ledinfo = par->targinfo; struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + unsigned long led_delay = XT_LED_BLINK_DELAY; /* * If "always blink" is enabled, and there's still some time until the @@ -62,9 +65,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par) */ if ((ledinfo->delay > 0) && ledinfo->always_blink && timer_pending(&ledinternal->timer)) - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); - - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); + led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger, + &led_delay, &led_delay, 1); + else + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); /* If there's a positive delay, start/update the timer */ if (ledinfo->delay > 0) { @@ -133,9 +137,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) err = led_trigger_register(&ledinternal->netfilter_led_trigger); if (err) { - pr_warning("led_trigger_register() failed\n"); - if (err == -EEXIST) - pr_warning("Trigger name is already in use.\n"); + pr_err("Trigger name is already in use.\n"); goto exit_alloc; } diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 5ab24843370..c13b79440ed 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -27,806 +27,6 @@ #include <linux/netfilter/xt_LOG.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/nf_log.h> -#include <net/netfilter/xt_log.h> - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset) -{ - struct udphdr _udph; - const struct udphdr *uh; - - if (proto == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP "); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (fragment) - goto out; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - -out: - return 0; -} - -static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset, - unsigned int logflags) -{ - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (fragment) - return 0; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & XT_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); - - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3C " */ - sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & - TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)]; - const u_int8_t *op; - unsigned int i; - unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); - - op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), - optsize, _opt); - if (op == NULL) { - sb_add(m, "OPT (TRUNCATED)"); - return 1; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - - sb_add(m, ") "); - } - - return 0; -} - -static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk) -{ - if (!sk || sk->sk_state == TCP_TIME_WAIT) - return; - - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - const struct cred *cred = sk->sk_socket->file->f_cred; - sb_add(m, "UID=%u GID=%u ", - from_kuid_munged(&init_user_ns, cred->fsuid), - from_kgid_munged(&init_user_ns, cred->fsgid)); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -/* One level of recursion won't kill us */ -static void dump_ipv4_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, - unsigned int iphoff) -{ - struct iphdr _iph; - const struct iphdr *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - sb_add(m, "SRC=%pI4 DST=%pI4 ", - &ih->saddr, &ih->daddr); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - sb_add(m, "CE "); - if (ntohs(ih->frag_off) & IP_DF) - sb_add(m, "DF "); - if (ntohs(ih->frag_off) & IP_MF) - sb_add(m, "MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & XT_LOG_IPOPT) && - ih->ihl * 4 > sizeof(struct iphdr)) { - const unsigned char *op; - unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: - if (dump_tcp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4, logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (dump_udp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4)) - return; - break; - case IPPROTO_ICMP: { - struct icmphdr _icmph; - const struct icmphdr *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - sb_add(m, "PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES && - required_len[ich->type] && - skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - sb_add(m, "PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - sb_add(m, "["); - dump_ipv4_packet(m, info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH && - ich->code == ICMP_FRAG_NEEDED) - sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - sb_add(m, "PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 10 "PROTO=ESP " */ - sb_add(m, "PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - sb_add(m, "PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff) - dump_sk_uid_gid(m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!iphoff && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* UDPLITE: 14+max(25,20) = 39 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static void dump_ipv4_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & XT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int i; - - sb_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - sb_add(m, ":%02x", *p); - } - sb_add(m, " "); -} - -static void -log_packet_common(struct sbuff *m, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", - '0' + loginfo->u.log.level, prefix, - in ? in->name : "", - out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = skb->nf_bridge->physindev; - if (physindev && in != physindev) - sb_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; - if (physoutdev && out != physoutdev) - sb_add(m, "PHYSOUT=%s ", physoutdev->name); - } -#endif -} - - -static void -ipt_log_packet(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) - return; - - m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - - if (in != NULL) - dump_ipv4_mac_header(m, loginfo, skb); - - dump_ipv4_packet(m, loginfo, skb, 0); - - sb_close(m); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -/* One level of recursion won't kill us */ -static void dump_ipv6_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int ip6hoff, - int recurse) -{ - u_int8_t currenthdr; - int fragment; - struct ipv6hdr _ip6h; - const struct ipv6hdr *ih; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); - - /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", - ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, - ih->hop_limit, - (ntohl(*(__be32 *)ih) & 0x000fffff)); - - fragment = 0; - ptr = ip6hoff + sizeof(struct ipv6hdr); - currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr; - const struct ipv6_opt_hdr *hp; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - if (hp == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 48 "OPT (...) " */ - if (logflags & XT_LOG_IPOPT) - sb_add(m, "OPT ( "); - - switch (currenthdr) { - case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr; - const struct frag_hdr *fh; - - sb_add(m, "FRAG:"); - fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), - &_fhdr); - if (fh == NULL) { - sb_add(m, "TRUNCATED "); - return; - } - - /* Max length: 6 "65535 " */ - sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); - - /* Max length: 11 "INCOMPLETE " */ - if (fh->frag_off & htons(0x0001)) - sb_add(m, "INCOMPLETE "); - - sb_add(m, "ID:%08x ", ntohl(fh->identification)); - - if (ntohs(fh->frag_off) & 0xFFF8) - fragment = 1; - - hdrlen = 8; - - break; - } - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - if (fragment) { - if (logflags & XT_LOG_IPOPT) - sb_add(m, ")"); - return; - } - hdrlen = ipv6_optlen(hp); - break; - /* Max Length */ - case IPPROTO_AH: - if (logflags & XT_LOG_IPOPT) { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - /* Max length: 3 "AH " */ - sb_add(m, "AH "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), - &_ahdr); - if (ah == NULL) { - /* - * Max length: 26 "INCOMPLETE [65535 - * bytes] )" - */ - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 15 "SPI=0xF1234567 */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - - } - - hdrlen = (hp->hdrlen+2)<<2; - break; - case IPPROTO_ESP: - if (logflags & XT_LOG_IPOPT) { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 4 "ESP " */ - sb_add(m, "ESP "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - /* - * Max length: 26 "INCOMPLETE [65535 bytes] )" - */ - eh = skb_header_pointer(skb, ptr, sizeof(_esph), - &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 16 "SPI=0xF1234567 )" */ - sb_add(m, "SPI=0x%x )", ntohl(eh->spi)); - - } - return; - default: - /* Max length: 20 "Unknown Ext Hdr 255" */ - sb_add(m, "Unknown Ext Hdr %u", currenthdr); - return; - } - if (logflags & XT_LOG_IPOPT) - sb_add(m, ") "); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - } - - switch (currenthdr) { - case IPPROTO_TCP: - if (dump_tcp_header(m, skb, currenthdr, fragment, ptr, - logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (dump_udp_header(m, skb, currenthdr, fragment, ptr)) - return; - break; - case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h; - const struct icmp6hdr *ic; - - /* Max length: 13 "PROTO=ICMPv6 " */ - sb_add(m, "PROTO=ICMPv6 "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); - if (ic == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); - - switch (ic->icmp6_type) { - case ICMPV6_ECHO_REQUEST: - case ICMPV6_ECHO_REPLY: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ic->icmp6_identifier), - ntohs(ic->icmp6_sequence)); - break; - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - break; - - case ICMPV6_PARAMPROB: - /* Max length: 17 "POINTER=ffffffff " */ - sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ - case ICMPV6_DEST_UNREACH: - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - /* Max length: 3+maxlen */ - if (recurse) { - sb_add(m, "["); - dump_ipv6_packet(m, info, skb, - ptr + sizeof(_icmp6h), 0); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) - sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); - } - break; - } - /* Max length: 10 "PROTO=255 " */ - default: - sb_add(m, "PROTO=%u ", currenthdr); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse) - dump_sk_uid_gid(m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (recurse && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); -} - -static void dump_ipv6_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & XT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT) { - p -= ETH_HLEN; - - if (p < skb->head) - p = NULL; - } - - if (p != NULL) { - sb_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - sb_add(m, ":%02x", *p++); - } - sb_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, - &iph->daddr); - } - } else - sb_add(m, " "); -} - -static void -ip6t_log_packet(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) - return; - - m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - - if (in != NULL) - dump_ipv6_mac_header(m, loginfo, skb); - - dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); - - sb_close(m); -} -#endif static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -839,17 +39,8 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - if (par->family == NFPROTO_IPV4) - ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in, - par->out, &li, loginfo->prefix); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in, - par->out, &li, loginfo->prefix); -#endif - else - WARN_ON_ONCE(1); - + nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, + &li, "%s", loginfo->prefix); return XT_CONTINUE; } @@ -870,7 +61,12 @@ static int log_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - return 0; + return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); +} + +static void log_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_logger_put(par->family, NF_LOG_TYPE_LOG); } static struct xt_target log_tg_regs[] __read_mostly = { @@ -880,6 +76,7 @@ static struct xt_target log_tg_regs[] __read_mostly = { .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, + .destroy = log_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -889,78 +86,19 @@ static struct xt_target log_tg_regs[] __read_mostly = { .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, + .destroy = log_tg_destroy, .me = THIS_MODULE, }, #endif }; -static struct nf_logger ipt_log_logger __read_mostly = { - .name = "ipt_LOG", - .logfn = &ipt_log_packet, - .me = THIS_MODULE, -}; - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static struct nf_logger ip6t_log_logger __read_mostly = { - .name = "ip6t_LOG", - .logfn = &ip6t_log_packet, - .me = THIS_MODULE, -}; -#endif - -static int __net_init log_net_init(struct net *net) -{ - nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger); -#endif - return 0; -} - -static void __net_exit log_net_exit(struct net *net) -{ - nf_log_unset(net, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_unset(net, &ip6t_log_logger); -#endif -} - -static struct pernet_operations log_net_ops = { - .init = log_net_init, - .exit = log_net_exit, -}; - static int __init log_tg_init(void) { - int ret; - - ret = register_pernet_subsys(&log_net_ops); - if (ret < 0) - goto err_pernet; - - ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); - if (ret < 0) - goto err_target; - - nf_log_register(NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); -#endif - return 0; - -err_target: - unregister_pernet_subsys(&log_net_ops); -err_pernet: - return ret; + return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } static void __exit log_tg_exit(void) { - unregister_pernet_subsys(&log_net_ops); - nf_log_unregister(&ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_unregister(&ip6t_log_logger); -#endif xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index bbffdbdaf60..dffee9d47ec 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par) program.len = info->bpf_program_num_elem; program.filter = info->bpf_program; - if (sk_unattached_filter_create(&info->filter, &program)) { + if (bpf_prog_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; - return SK_RUN_FILTER(info->filter, skb); + return BPF_PROG_RUN(info->filter, skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; - sk_unattached_filter_destroy(info->filter); + bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index f4e83300532..7198d660b4d 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) if (info->invert & ~1) return -EINVAL; - return info->id ? 0 : -EINVAL; + return 0; } static bool diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a3910fc2122..47dc6836830 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -104,7 +104,7 @@ struct xt_hashlimit_htable { spinlock_t lock; /* lock for list_head */ u_int32_t rnd; /* random seed for hash */ unsigned int count; /* number entries in table */ - struct timer_list timer; /* timer for gc */ + struct delayed_work gc_work; /* seq_file stuff */ struct proc_dir_entry *pde; @@ -213,7 +213,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) call_rcu_bh(&ent->rcu, dsthash_free_rcu); ht->count--; } -static void htable_gc(unsigned long htlong); +static void htable_gc(struct work_struct *work); static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) @@ -273,9 +273,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, } hinfo->net = net; - setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); - hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); - add_timer(&hinfo->timer); + INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc); + queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work, + msecs_to_jiffies(hinfo->cfg.gc_interval)); hlist_add_head(&hinfo->node, &hashlimit_net->htables); @@ -300,29 +300,30 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, { unsigned int i; - /* lock hash table and iterate over it */ - spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; struct hlist_node *n; + + spin_lock_bh(&ht->lock); hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } + spin_unlock_bh(&ht->lock); + cond_resched(); } - spin_unlock_bh(&ht->lock); } -/* hash table garbage collector, run by timer */ -static void htable_gc(unsigned long htlong) +static void htable_gc(struct work_struct *work) { - struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong; + struct xt_hashlimit_htable *ht; + + ht = container_of(work, struct xt_hashlimit_htable, gc_work.work); htable_selective_cleanup(ht, select_gc); - /* re-add the timer accordingly */ - ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval); - add_timer(&ht->timer); + queue_delayed_work(system_power_efficient_wq, + &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval)); } static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) @@ -341,7 +342,7 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) static void htable_destroy(struct xt_hashlimit_htable *hinfo) { - del_timer_sync(&hinfo->timer); + cancel_delayed_work_sync(&hinfo->gc_work); htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); |