summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig17
-rw-r--r--net/netfilter/Makefile5
-rw-r--r--net/netfilter/core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c105
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c68
-rw-r--r--net/netfilter/nf_conntrack_ecache.c96
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_log.c155
-rw-r--r--net/netfilter/nf_log_common.c187
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_nat_proto_common.c2
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c2
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c2
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c2
-rw-r--r--net/netfilter/nf_sockopt.c8
-rw-r--r--net/netfilter/nf_tables_api.c168
-rw-r--r--net/netfilter/nfnetlink.c64
-rw-r--r--net/netfilter/nfnetlink_acct.c12
-rw-r--r--net/netfilter/nfnetlink_log.c4
-rw-r--r--net/netfilter/nft_hash.c287
-rw-r--r--net/netfilter/nft_log.c98
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/netfilter/x_tables.c70
-rw-r--r--net/netfilter/xt_LED.c14
-rw-r--r--net/netfilter/xt_LOG.c884
-rw-r--r--net/netfilter/xt_bpf.c6
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_hashlimit.c31
33 files changed, 785 insertions, 1559 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e9410d17619..6d77cce481d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -46,6 +46,9 @@ config NF_CONNTRACK
To compile it as a module, choose M here. If unsure, say N.
+config NF_LOG_COMMON
+ tristate
+
if NF_CONNTRACK
config NF_CONNTRACK_MARK
@@ -496,7 +499,7 @@ config NFT_LIMIT
config NFT_NAT
depends on NF_TABLES
depends on NF_CONNTRACK
- depends on NF_NAT
+ select NF_NAT
tristate "Netfilter nf_tables nat module"
help
This option adds the "nat" expression that you can use to perform
@@ -744,6 +747,9 @@ config NETFILTER_XT_TARGET_LED
config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
+ select NF_LOG_COMMON
+ select NF_LOG_IPV4
+ select NF_LOG_IPV6 if IPV6
default m if NETFILTER_ADVANCED=n
help
This option adds a `LOG' target, which allows you to create rules in
@@ -760,6 +766,14 @@ config NETFILTER_XT_TARGET_MARK
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
+config NETFILTER_XT_NAT
+ tristate '"SNAT and DNAT" targets support'
+ depends on NF_NAT
+ ---help---
+ This option enables the SNAT and DNAT targets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_NETMAP
tristate '"NETMAP" target support'
depends on NF_NAT
@@ -833,6 +847,7 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on (IPV6 || IPV6=n)
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index bffdad774da..fad5fdba34e 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -47,6 +47,9 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+# generic transport layer logging
+obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
+
obj-$(CONFIG_NF_NAT) += nf_nat.o
# NAT protocols (nf_nat)
@@ -92,7 +95,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
-obj-$(CONFIG_NF_NAT) += xt_nat.o
+obj-$(CONFIG_NETFILTER_XT_NAT) += xt_nat.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 1fbab0cdd30..024a2e25c8a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -35,11 +35,7 @@ EXPORT_SYMBOL_GPL(nf_ipv6_ops);
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
- int err;
-
- err = mutex_lock_interruptible(&afinfo_mutex);
- if (err < 0)
- return err;
+ mutex_lock(&afinfo_mutex);
RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
mutex_unlock(&afinfo_mutex);
return 0;
@@ -58,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
#endif
@@ -68,18 +64,15 @@ static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
- int err;
- err = mutex_lock_interruptible(&nf_hook_mutex);
- if (err < 0)
- return err;
+ mutex_lock(&nf_hook_mutex);
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)
break;
}
list_add_rcu(&reg->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
@@ -91,7 +84,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
-#if defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e6836755c45..5c34e8d42e0 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1906,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
{
.hook = ip_vs_local_reply6,
.owner = THIS_MODULE,
- .pf = NFPROTO_IPV4,
+ .pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 581a6584ed0..fd3f444a4f9 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1807,92 +1807,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec,
},
#endif
-#if 0
- {
- .procname = "timeout_established",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synsent",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synrecv",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_finwait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_timewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_close",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_closewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_lastack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_listen",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_synack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_udp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "timeout_icmp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
-#endif
{ }
};
@@ -2357,10 +2271,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
- ret = -ERESTARTSYS;
- goto out_dec;
- }
+ mutex_lock(&ipvs->sync_mutex);
if (cmd == IP_VS_SO_SET_STARTDAEMON)
ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
dm->syncid);
@@ -2370,11 +2281,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_dec;
}
- if (mutex_lock_interruptible(&__ip_vs_mutex)) {
- ret = -ERESTARTSYS;
- goto out_dec;
- }
-
+ mutex_lock(&__ip_vs_mutex);
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
ret = ip_vs_flush(net, false);
@@ -2659,9 +2566,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (mutex_lock_interruptible(&ipvs->sync_mutex))
- return -ERESTARTSYS;
-
+ mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
@@ -2680,9 +2585,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
- if (mutex_lock_interruptible(&__ip_vs_mutex))
- return -ERESTARTSYS;
-
+ mutex_lock(&__ip_vs_mutex);
switch (cmd) {
case IP_VS_SO_GET_VERSION:
{
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index db801263ee9..eadffb29dec 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -886,8 +886,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
rcu_read_unlock();
if (!cp) {
- if (param->pe_data)
- kfree(param->pe_data);
+ kfree(param->pe_data);
IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
return;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 73ba1cc7a88..56896a412bc 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -38,6 +38,7 @@
#include <net/route.h> /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
#include <net/addrconf.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
@@ -862,11 +863,15 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
old_iph = ip_hdr(skb);
}
- skb->transport_header = skb->network_header;
-
/* fix old IP header checksum */
ip_send_check(old_iph);
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ if (IS_ERR(skb))
+ goto tx_error;
+
+ skb->transport_header = skb->network_header;
+
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -900,7 +905,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
tx_error:
- kfree_skb(skb);
+ if (!IS_ERR(skb))
+ kfree_skb(skb);
rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
@@ -953,6 +959,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
old_iph = ipv6_hdr(skb);
}
+ /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
+ skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+ if (IS_ERR(skb))
+ goto tx_error;
+
skb->transport_header = skb->network_header;
skb_push(skb, sizeof(struct ipv6hdr));
@@ -967,8 +978,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->nexthdr = IPPROTO_IPV6;
iph->payload_len = old_iph->payload_len;
be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
- iph->priority = old_iph->priority;
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+ ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
iph->hop_limit = old_iph->hop_limit;
@@ -988,7 +999,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
tx_error:
- kfree_skb(skb);
+ if (!IS_ERR(skb))
+ kfree_skb(skb);
rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1f4f954c4b4..de88c4ab514 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -352,40 +352,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
local_bh_enable();
}
-static void death_by_event(unsigned long ul_conntrack)
-{
- struct nf_conn *ct = (void *)ul_conntrack;
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
-
- BUG_ON(ecache == NULL);
-
- if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
- /* bad luck, let's retry again */
- ecache->timeout.expires = jiffies +
- (prandom_u32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ecache->timeout);
- return;
- }
- /* we've got the event delivered, now it's dying */
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_put(ct);
-}
-
-static void nf_ct_dying_timeout(struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
-
- BUG_ON(ecache == NULL);
-
- /* set a new timer to retry event delivery */
- setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
- ecache->timeout.expires = jiffies +
- (prandom_u32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ecache->timeout);
-}
-
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
@@ -394,15 +360,20 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
- if (!nf_ct_is_dying(ct) &&
- unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
- portid, report) < 0)) {
+ if (nf_ct_is_dying(ct))
+ goto delete;
+
+ if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+ portid, report) < 0) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
- nf_ct_dying_timeout(ct);
+ nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
+
+ nf_conntrack_ecache_work(nf_ct_net(ct));
set_bit(IPS_DYING_BIT, &ct->status);
+ delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
@@ -1464,26 +1435,6 @@ void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
-static void nf_ct_release_dying_list(struct net *net)
-{
- struct nf_conntrack_tuple_hash *h;
- struct nf_conn *ct;
- struct hlist_nulls_node *n;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- /* never fails to remove them, no listeners at this point */
- nf_ct_kill(ct);
- }
- spin_unlock_bh(&pcpu->lock);
- }
-}
-
static int untrack_refs(void)
{
int cnt = 0, cpu;
@@ -1548,7 +1499,6 @@ i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
- nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1df17614656..4e78c57b818 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -29,6 +29,90 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
+#define ECACHE_RETRY_WAIT (HZ/10)
+
+enum retry_state {
+ STATE_CONGESTED,
+ STATE_RESTART,
+ STATE_DONE,
+};
+
+static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
+{
+ struct nf_conn *refs[16];
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ unsigned int evicted = 0;
+ enum retry_state ret = STATE_DONE;
+
+ spin_lock(&pcpu->lock);
+
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
+ if (nf_conntrack_event(IPCT_DESTROY, ct)) {
+ ret = STATE_CONGESTED;
+ break;
+ }
+
+ /* we've got the event delivered, now it's dying */
+ set_bit(IPS_DYING_BIT, &ct->status);
+ refs[evicted] = ct;
+
+ if (++evicted >= ARRAY_SIZE(refs)) {
+ ret = STATE_RESTART;
+ break;
+ }
+ }
+
+ spin_unlock(&pcpu->lock);
+
+ /* can't _put while holding lock */
+ while (evicted)
+ nf_ct_put(refs[--evicted]);
+
+ return ret;
+}
+
+static void ecache_work(struct work_struct *work)
+{
+ struct netns_ct *ctnet =
+ container_of(work, struct netns_ct, ecache_dwork.work);
+ int cpu, delay = -1;
+ struct ct_pcpu *pcpu;
+
+ local_bh_disable();
+
+ for_each_possible_cpu(cpu) {
+ enum retry_state ret;
+
+ pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
+
+ ret = ecache_work_evict_list(pcpu);
+
+ switch (ret) {
+ case STATE_CONGESTED:
+ delay = ECACHE_RETRY_WAIT;
+ goto out;
+ case STATE_RESTART:
+ delay = 0;
+ break;
+ case STATE_DONE:
+ break;
+ }
+ }
+
+ out:
+ local_bh_enable();
+
+ ctnet->ecache_dwork_pending = delay > 0;
+ if (delay >= 0)
+ schedule_delayed_work(&ctnet->ecache_dwork, delay);
+}
+
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
@@ -157,7 +241,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
-static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
#ifdef CONFIG_SYSCTL
static struct ctl_table event_sysctl_table[] = {
@@ -168,13 +251,6 @@ static struct ctl_table event_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_events_retry_timeout",
- .data = &init_net.ct.sysctl_events_retry_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
{}
};
#endif /* CONFIG_SYSCTL */
@@ -196,7 +272,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
goto out;
table[0].data = &net->ct.sysctl_events;
- table[1].data = &net->ct.sysctl_events_retry_timeout;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -238,12 +313,13 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
int nf_conntrack_ecache_pernet_init(struct net *net)
{
net->ct.sysctl_events = nf_ct_events;
- net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+ INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
return nf_conntrack_event_init_sysctl(net);
}
void nf_conntrack_ecache_pernet_fini(struct net *net)
{
+ cancel_delayed_work_sync(&net->ct.ecache_dwork);
nf_conntrack_event_fini_sysctl(net);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 300ed1eec72..355a5c4ef76 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -745,8 +745,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
{
if (cb->args[1])
nf_ct_put((struct nf_conn *)cb->args[1]);
- if (cb->data)
- kfree(cb->data);
+ kfree(cb->data);
return 0;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 85296d4eac0..daad6022c68 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,16 +16,22 @@
#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
-static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
+static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
static struct nf_logger *__find_logger(int pf, const char *str_logger)
{
- struct nf_logger *t;
+ struct nf_logger *log;
+ int i;
+
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (loggers[pf][i] == NULL)
+ continue;
- list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) {
- if (!strnicmp(str_logger, t->name, strlen(t->name)))
- return t;
+ log = rcu_dereference_protected(loggers[pf][i],
+ lockdep_is_held(&nf_log_mutex));
+ if (!strnicmp(str_logger, log->name, strlen(log->name)))
+ return log;
}
return NULL;
@@ -73,17 +79,14 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(logger->list); i++)
- INIT_LIST_HEAD(&logger->list[i]);
-
mutex_lock(&nf_log_mutex);
if (pf == NFPROTO_UNSPEC) {
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
- list_add_tail(&(logger->list[i]), &(nf_loggers_l[i]));
+ rcu_assign_pointer(loggers[i][logger->type], logger);
} else {
/* register at end of list to honor first register win */
- list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
+ rcu_assign_pointer(loggers[pf][logger->type], logger);
}
mutex_unlock(&nf_log_mutex);
@@ -98,7 +101,7 @@ void nf_log_unregister(struct nf_logger *logger)
mutex_lock(&nf_log_mutex);
for (i = 0; i < NFPROTO_NUMPROTO; i++)
- list_del(&logger->list[i]);
+ RCU_INIT_POINTER(loggers[i][logger->type], NULL);
mutex_unlock(&nf_log_mutex);
}
EXPORT_SYMBOL(nf_log_unregister);
@@ -129,6 +132,48 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf)
}
EXPORT_SYMBOL(nf_log_unbind_pf);
+void nf_logger_request_module(int pf, enum nf_log_type type)
+{
+ if (loggers[pf][type] == NULL)
+ request_module("nf-logger-%u-%u", pf, type);
+}
+EXPORT_SYMBOL_GPL(nf_logger_request_module);
+
+int nf_logger_find_get(int pf, enum nf_log_type type)
+{
+ struct nf_logger *logger;
+ int ret = -ENOENT;
+
+ logger = loggers[pf][type];
+ if (logger == NULL)
+ request_module("nf-logger-%u-%u", pf, type);
+
+ rcu_read_lock();
+ logger = rcu_dereference(loggers[pf][type]);
+ if (logger == NULL)
+ goto out;
+
+ if (logger && try_module_get(logger->me))
+ ret = 0;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_logger_find_get);
+
+void nf_logger_put(int pf, enum nf_log_type type)
+{
+ struct nf_logger *logger;
+
+ BUG_ON(loggers[pf][type] == NULL);
+
+ rcu_read_lock();
+ logger = rcu_dereference(loggers[pf][type]);
+ module_put(logger->me);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_logger_put);
+
void nf_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
@@ -143,7 +188,11 @@ void nf_log_packet(struct net *net,
const struct nf_logger *logger;
rcu_read_lock();
- logger = rcu_dereference(net->nf.nf_loggers[pf]);
+ if (loginfo != NULL)
+ logger = rcu_dereference(loggers[pf][loginfo->type]);
+ else
+ logger = rcu_dereference(net->nf.nf_loggers[pf]);
+
if (logger) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -154,6 +203,63 @@ void nf_log_packet(struct net *net,
}
EXPORT_SYMBOL(nf_log_packet);
+#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
+
+struct nf_log_buf {
+ unsigned int count;
+ char buf[S_SIZE + 1];
+};
+static struct nf_log_buf emergency, *emergency_ptr = &emergency;
+
+__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...)
+{
+ va_list args;
+ int len;
+
+ if (likely(m->count < S_SIZE)) {
+ va_start(args, f);
+ len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args);
+ va_end(args);
+ if (likely(m->count + len < S_SIZE)) {
+ m->count += len;
+ return 0;
+ }
+ }
+ m->count = S_SIZE;
+ printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n");
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_add);
+
+struct nf_log_buf *nf_log_buf_open(void)
+{
+ struct nf_log_buf *m = kmalloc(sizeof(*m), GFP_ATOMIC);
+
+ if (unlikely(!m)) {
+ local_bh_disable();
+ do {
+ m = xchg(&emergency_ptr, NULL);
+ } while (!m);
+ }
+ m->count = 0;
+ return m;
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_open);
+
+void nf_log_buf_close(struct nf_log_buf *m)
+{
+ m->buf[m->count] = 0;
+ printk("%s\n", m->buf);
+
+ if (likely(m != &emergency))
+ kfree(m);
+ else {
+ emergency_ptr = m;
+ local_bh_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_log_buf_close);
+
#ifdef CONFIG_PROC_FS
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -188,8 +294,7 @@ static int seq_show(struct seq_file *s, void *v)
{
loff_t *pos = v;
const struct nf_logger *logger;
- struct nf_logger *t;
- int ret;
+ int i, ret;
struct net *net = seq_file_net(s);
logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
@@ -203,11 +308,16 @@ static int seq_show(struct seq_file *s, void *v)
if (ret < 0)
return ret;
- list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) {
- ret = seq_printf(s, "%s", t->name);
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (loggers[*pos][i] == NULL)
+ continue;
+
+ logger = rcu_dereference_protected(loggers[*pos][i],
+ lockdep_is_held(&nf_log_mutex));
+ ret = seq_printf(s, "%s", logger->name);
if (ret < 0)
return ret;
- if (&t->list[*pos] != nf_loggers_l[*pos].prev) {
+ if (i == 0 && loggers[*pos][i + 1] != NULL) {
ret = seq_printf(s, ",");
if (ret < 0)
return ret;
@@ -389,14 +499,5 @@ static struct pernet_operations nf_log_net_ops = {
int __init netfilter_log_init(void)
{
- int i, ret;
-
- ret = register_pernet_subsys(&nf_log_net_ops);
- if (ret < 0)
- return ret;
-
- for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
- INIT_LIST_HEAD(&(nf_loggers_l[i]));
-
- return 0;
+ return register_pernet_subsys(&nf_log_net_ops);
}
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
new file mode 100644
index 00000000000..eeb8ef4ff1a
--- /dev/null
+++ b/net/netfilter/nf_log_common.c
@@ -0,0 +1,187 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
+ u8 proto, int fragment, unsigned int offset)
+{
+ struct udphdr _udph;
+ const struct udphdr *uh;
+
+ if (proto == IPPROTO_UDP)
+ /* Max length: 10 "PROTO=UDP " */
+ nf_log_buf_add(m, "PROTO=UDP ");
+ else /* Max length: 14 "PROTO=UDPLITE " */
+ nf_log_buf_add(m, "PROTO=UDPLITE ");
+
+ if (fragment)
+ goto out;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+
+ return 1;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ",
+ ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len));
+
+out:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_udp_header);
+
+int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
+ u8 proto, int fragment, unsigned int offset,
+ unsigned int logflags)
+{
+ struct tcphdr _tcph;
+ const struct tcphdr *th;
+
+ /* Max length: 10 "PROTO=TCP " */
+ nf_log_buf_add(m, "PROTO=TCP ");
+
+ if (fragment)
+ return 0;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+ return 1;
+ }
+
+ /* Max length: 20 "SPT=65535 DPT=65535 " */
+ nf_log_buf_add(m, "SPT=%u DPT=%u ",
+ ntohs(th->source), ntohs(th->dest));
+ /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ if (logflags & XT_LOG_TCPSEQ) {
+ nf_log_buf_add(m, "SEQ=%u ACK=%u ",
+ ntohl(th->seq), ntohl(th->ack_seq));
+ }
+
+ /* Max length: 13 "WINDOW=65535 " */
+ nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window));
+ /* Max length: 9 "RES=0x3C " */
+ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
+ TCP_RESERVED_BITS) >> 22));
+ /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ if (th->cwr)
+ nf_log_buf_add(m, "CWR ");
+ if (th->ece)
+ nf_log_buf_add(m, "ECE ");
+ if (th->urg)
+ nf_log_buf_add(m, "URG ");
+ if (th->ack)
+ nf_log_buf_add(m, "ACK ");
+ if (th->psh)
+ nf_log_buf_add(m, "PSH ");
+ if (th->rst)
+ nf_log_buf_add(m, "RST ");
+ if (th->syn)
+ nf_log_buf_add(m, "SYN ");
+ if (th->fin)
+ nf_log_buf_add(m, "FIN ");
+ /* Max length: 11 "URGP=65535 " */
+ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+
+ if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
+ u_int8_t _opt[60 - sizeof(struct tcphdr)];
+ const u_int8_t *op;
+ unsigned int i;
+ unsigned int optsize = th->doff*4 - sizeof(struct tcphdr);
+
+ op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
+ optsize, _opt);
+ if (op == NULL) {
+ nf_log_buf_add(m, "OPT (TRUNCATED)");
+ return 1;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ nf_log_buf_add(m, "OPT (");
+ for (i = 0; i < optsize; i++)
+ nf_log_buf_add(m, "%02X", op[i]);
+
+ nf_log_buf_add(m, ") ");
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
+
+void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+{
+ if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ return;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ const struct cred *cred = sk->sk_socket->file->f_cred;
+ nf_log_buf_add(m, "UID=%u GID=%u ",
+ from_kuid_munged(&init_user_ns, cred->fsuid),
+ from_kgid_munged(&init_user_ns, cred->fsgid));
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid);
+
+void
+nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo, const char *prefix)
+{
+ nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
+ '0' + loginfo->u.log.level, prefix,
+ in ? in->name : "",
+ out ? out->name : "");
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge) {
+ const struct net_device *physindev;
+ const struct net_device *physoutdev;
+
+ physindev = skb->nf_bridge->physindev;
+ if (physindev && in != physindev)
+ nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
+ physoutdev = skb->nf_bridge->physoutdev;
+ if (physoutdev && out != physoutdev)
+ nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+
+static int __init nf_log_common_init(void)
+{
+ return 0;
+}
+
+static void __exit nf_log_common_exit(void) {}
+
+module_init(nf_log_common_init);
+module_exit(nf_log_common_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index a49907b1dab..552f97cd9fd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -710,7 +710,7 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.flags = NF_CT_EXT_F_PREALLOC,
};
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 83a72a235ca..fbce552a796 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -95,7 +95,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
{
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index c8be2cdac0b..b8067b53ff3 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -78,7 +78,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = dccp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 754536f2c67..cbc7ade1487 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -59,7 +59,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = sctp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 83ec8a6e4c3..37f5505f452 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -79,7 +79,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
.manip_pkt = tcp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index 7df613fb34a..b0ede2f0d8b 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -70,7 +70,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_udp = {
.manip_pkt = udp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 776a0d1317b..368f14e01e7 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -69,7 +69,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udplite_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index f042ae52155..c68c1e58b36 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -26,9 +26,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
struct nf_sockopt_ops *ops;
int ret = 0;
- if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
+ mutex_lock(&nf_sockopt_mutex);
list_for_each_entry(ops, &nf_sockopts, list) {
if (ops->pf == reg->pf
&& (overlap(ops->set_optmin, ops->set_optmax,
@@ -65,9 +63,7 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
{
struct nf_sockopt_ops *ops;
- if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&nf_sockopt_mutex);
list_for_each_entry(ops, &nf_sockopts, list) {
if (ops->pf == pf) {
if (!try_module_get(ops->owner))
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8746ff9a835..deeb95fb702 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -899,6 +899,9 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
static void nft_chain_stats_replace(struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
+ if (newstats == NULL)
+ return;
+
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -2247,80 +2250,7 @@ err:
return err;
}
-static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct nft_set *set;
- unsigned int idx = 0, s_idx = cb->args[0];
-
- if (cb->args[1])
- return skb->len;
-
- rcu_read_lock();
- cb->seq = ctx->net->nft.base_seq;
-
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- cb->args[1] = 1;
-done:
- rcu_read_unlock();
- return skb->len;
-}
-
-static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct nft_set *set;
- unsigned int idx, s_idx = cb->args[0];
- struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
-
- if (cb->args[1])
- return skb->len;
-
- rcu_read_lock();
- cb->seq = ctx->net->nft.base_seq;
-
- list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
- if (cur_table) {
- if (cur_table != table)
- continue;
-
- cur_table = NULL;
- }
- ctx->table = table;
- idx = 0;
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- cb->args[2] = (unsigned long) table;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- }
- cb->args[1] = 1;
-done:
- rcu_read_unlock();
- return skb->len;
-}
-
-static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
- struct netlink_callback *cb)
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
@@ -2328,6 +2258,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
+ struct nft_ctx *ctx = cb->data, ctx_set;
if (cb->args[1])
return skb->len;
@@ -2336,28 +2267,34 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cb->seq = net->nft.base_seq;
list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+ if (ctx->afi && ctx->afi != afi)
+ continue;
+
if (cur_family) {
if (afi->family != cur_family)
continue;
cur_family = 0;
}
-
list_for_each_entry_rcu(table, &afi->tables, list) {
+ if (ctx->table && ctx->table != table)
+ continue;
+
if (cur_table) {
if (cur_table != table)
continue;
cur_table = NULL;
}
-
- ctx->table = table;
- ctx->afi = afi;
idx = 0;
- list_for_each_entry_rcu(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list) {
if (idx < s_idx)
goto cont;
- if (nf_tables_fill_set(skb, ctx, set,
+
+ ctx_set = *ctx;
+ ctx_set.table = table;
+ ctx_set.afi = afi;
+ if (nf_tables_fill_set(skb, &ctx_set, set,
NFT_MSG_NEWSET,
NLM_F_MULTI) < 0) {
cb->args[0] = idx;
@@ -2379,31 +2316,10 @@ done:
return skb->len;
}
-static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+static int nf_tables_dump_sets_done(struct netlink_callback *cb)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- struct nlattr *nla[NFTA_SET_MAX + 1];
- struct nft_ctx ctx;
- int err, ret;
-
- err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
- nft_set_policy);
- if (err < 0)
- return err;
-
- err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
- if (err < 0)
- return err;
-
- if (ctx.table == NULL) {
- if (ctx.afi == NULL)
- ret = nf_tables_dump_sets_all(&ctx, skb, cb);
- else
- ret = nf_tables_dump_sets_family(&ctx, skb, cb);
- } else
- ret = nf_tables_dump_sets_table(&ctx, skb, cb);
-
- return ret;
+ kfree(cb->data);
+ return 0;
}
#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
@@ -2426,7 +2342,17 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_sets,
+ .done = nf_tables_dump_sets_done,
};
+ struct nft_ctx *ctx_dump;
+
+ ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL);
+ if (ctx_dump == NULL)
+ return -ENOMEM;
+
+ *ctx_dump = ctx;
+ c.data = ctx_dump;
+
return netlink_dump_start(nlsk, skb, nlh, &c);
}
@@ -3150,6 +3076,9 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int rem, err = 0;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
+ return -EINVAL;
+
err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
@@ -3208,16 +3137,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
goto err2;
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
- if (trans == NULL)
+ if (trans == NULL) {
+ err = -ENOMEM;
goto err2;
+ }
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- nft_data_uninit(&elem.key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(&elem.data, set->dtype);
-
+ return 0;
err2:
nft_data_uninit(&elem.key, desc.type);
err1:
@@ -3233,6 +3160,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int rem, err = 0;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
+ return -EINVAL;
+
err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -3380,7 +3310,7 @@ static int nf_tables_commit(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
- struct nft_set *set;
+ struct nft_trans_elem *te;
/* Bump generation counter, invalidate any dump in progress */
while (++net->nft.base_seq == 0);
@@ -3466,13 +3396,17 @@ static int nf_tables_commit(struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
- nf_tables_setelem_notify(&trans->ctx,
- nft_trans_elem_set(trans),
- &nft_trans_elem(trans),
+ te = (struct nft_trans_elem *)trans->data;
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
NFT_MSG_DELSETELEM, 0);
- set = nft_trans_elem_set(trans);
- set->ops->get(set, &nft_trans_elem(trans));
- set->ops->remove(set, &nft_trans_elem(trans));
+ te->set->ops->get(te->set, &te->elem);
+ te->set->ops->remove(te->set, &te->elem);
+ nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
+ if (te->elem.flags & NFT_SET_MAP) {
+ nft_data_uninit(&te->elem.data,
+ te->set->dtype);
+ }
nft_trans_destroy(trans);
break;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c138b8fbe28..f37f0716a9f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -222,6 +222,51 @@ replay:
}
}
+struct nfnl_err {
+ struct list_head head;
+ struct nlmsghdr *nlh;
+ int err;
+};
+
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+{
+ struct nfnl_err *nfnl_err;
+
+ nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
+ if (nfnl_err == NULL)
+ return -ENOMEM;
+
+ nfnl_err->nlh = nlh;
+ nfnl_err->err = err;
+ list_add_tail(&nfnl_err->head, list);
+
+ return 0;
+}
+
+static void nfnl_err_del(struct nfnl_err *nfnl_err)
+{
+ list_del(&nfnl_err->head);
+ kfree(nfnl_err);
+}
+
+static void nfnl_err_reset(struct list_head *err_list)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head)
+ nfnl_err_del(nfnl_err);
+}
+
+static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head) {
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ nfnl_err_del(nfnl_err);
+ }
+}
+
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u_int16_t subsys_id)
{
@@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
bool success = true, done = false;
+ static LIST_HEAD(err_list);
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
@@ -287,6 +333,7 @@ replay:
type = nlh->nlmsg_type;
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
+ nfnl_err_reset(&err_list);
success = false;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
@@ -333,6 +380,7 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
+ nfnl_err_reset(&err_list);
ss->abort(skb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
@@ -341,11 +389,24 @@ replay:
}
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* Errors are delivered once the full batch has been
+ * processed, this avoids that the same error is
+ * reported several times when replaying the batch.
+ */
+ if (nfnl_err_add(&err_list, nlh, err) < 0) {
+ /* We failed to enqueue an error, reset the
+ * list of errors and send OOM to userspace
+ * pointing to the batch header.
+ */
+ nfnl_err_reset(&err_list);
+ netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ success = false;
+ goto done;
+ }
/* We don't stop processing the batch on errors, thus,
* userspace gets all the errors that the batch
* triggers.
*/
- netlink_ack(skb, nlh, err);
if (err)
success = false;
}
@@ -361,6 +422,7 @@ done:
else
ss->abort(skb);
+ nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 2baa125c2e8..3ea0eacbd97 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -41,6 +41,7 @@ struct nf_acct {
};
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
@@ -77,7 +78,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
smp_mb__before_atomic();
/* reset overquota flag if quota is enabled. */
if ((matching->flags & NFACCT_F_QUOTA))
- clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
+ clear_bit(NFACCT_OVERQUOTA_BIT,
+ &matching->flags);
return 0;
}
return -EBUSY;
@@ -129,6 +131,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
+ u32 old_flags;
event |= NFNL_SUBSYS_ACCT << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -143,12 +146,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (nla_put_string(skb, NFACCT_NAME, acct->name))
goto nla_put_failure;
+ old_flags = acct->flags;
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
smp_mb__before_atomic();
if (acct->flags & NFACCT_F_QUOTA)
- clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
+ clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
@@ -160,7 +164,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
- if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+ if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
goto nla_put_failure;
}
@@ -412,7 +416,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
ret = now > *quota;
if (now >= *quota &&
- !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+ !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
nfnl_overquota_report(nfacct);
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d292c8d286e..a11c5ff2f72 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -773,6 +773,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
static struct nf_logger nfulnl_logger __read_mostly = {
.name = "nfnetlink_log",
+ .type = NF_LOG_TYPE_ULOG,
.logfn = &nfulnl_log_packet,
.me = THIS_MODULE,
};
@@ -1105,6 +1106,9 @@ MODULE_DESCRIPTION("netfilter userspace logging");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
module_init(nfnetlink_log_init);
module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 4080ed6a072..8892b7b6184 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,209 +15,40 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
-#include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#define NFT_HASH_MIN_SIZE 4UL
-
-struct nft_hash {
- struct nft_hash_table __rcu *tbl;
-};
-
-struct nft_hash_table {
- unsigned int size;
- struct nft_hash_elem __rcu *buckets[];
-};
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
struct nft_hash_elem {
- struct nft_hash_elem __rcu *next;
+ struct rhash_head node;
struct nft_data key;
struct nft_data data[];
};
-#define nft_hash_for_each_entry(i, head) \
- for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
-#define nft_hash_for_each_entry_rcu(i, head) \
- for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
-
-static u32 nft_hash_rnd __read_mostly;
-static bool nft_hash_rnd_initted __read_mostly;
-
-static unsigned int nft_hash_data(const struct nft_data *data,
- unsigned int hsize, unsigned int len)
-{
- unsigned int h;
-
- h = jhash(data->data, len, nft_hash_rnd);
- return h & (hsize - 1);
-}
-
static bool nft_hash_lookup(const struct nft_set *set,
const struct nft_data *key,
struct nft_data *data)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
- unsigned int h;
-
- h = nft_hash_data(key, tbl->size, set->klen);
- nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
- if (nft_data_cmp(&he->key, key, set->klen))
- continue;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(data, he->data);
- return true;
- }
- return false;
-}
-
-static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
-{
- kvfree(tbl);
-}
-
-static unsigned int nft_hash_tbl_size(unsigned int nelem)
-{
- return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
-}
-
-static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
-{
- struct nft_hash_table *tbl;
- size_t size;
-
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
- if (tbl == NULL)
- tbl = vzalloc(size);
- if (tbl == NULL)
- return NULL;
- tbl->size = nbuckets;
-
- return tbl;
-}
-
-static void nft_hash_chain_unzip(const struct nft_set *set,
- const struct nft_hash_table *ntbl,
- struct nft_hash_table *tbl, unsigned int n)
-{
- struct nft_hash_elem *he, *last, *next;
- unsigned int h;
-
- he = nft_dereference(tbl->buckets[n]);
- if (he == NULL)
- return;
- h = nft_hash_data(&he->key, ntbl->size, set->klen);
-
- /* Find last element of first chain hashing to bucket h */
- last = he;
- nft_hash_for_each_entry(he, he->next) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
- break;
- last = he;
- }
- /* Unlink first chain from the old table */
- RCU_INIT_POINTER(tbl->buckets[n], last->next);
+ he = rhashtable_lookup(priv, key);
+ if (he && set->flags & NFT_SET_MAP)
+ nft_data_copy(data, he->data);
- /* If end of chain reached, done */
- if (he == NULL)
- return;
-
- /* Find first element of second chain hashing to bucket h */
- next = NULL;
- nft_hash_for_each_entry(he, he->next) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
- continue;
- next = he;
- break;
- }
-
- /* Link the two chains */
- RCU_INIT_POINTER(last->next, next);
-}
-
-static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
-{
- struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
- struct nft_hash_elem *he;
- unsigned int i, h;
- bool complete;
-
- ntbl = nft_hash_tbl_alloc(tbl->size * 2);
- if (ntbl == NULL)
- return -ENOMEM;
-
- /* Link new table's buckets to first element in the old table
- * hashing to the new bucket.
- */
- for (i = 0; i < ntbl->size; i++) {
- h = i < tbl->size ? i : i - tbl->size;
- nft_hash_for_each_entry(he, tbl->buckets[h]) {
- if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
- continue;
- RCU_INIT_POINTER(ntbl->buckets[i], he);
- break;
- }
- }
-
- /* Publish new table */
- rcu_assign_pointer(priv->tbl, ntbl);
-
- /* Unzip interleaved hash chains */
- do {
- /* Wait for readers to use new table/unzipped chains */
- synchronize_rcu();
-
- complete = true;
- for (i = 0; i < tbl->size; i++) {
- nft_hash_chain_unzip(set, ntbl, tbl, i);
- if (tbl->buckets[i] != NULL)
- complete = false;
- }
- } while (!complete);
-
- nft_hash_tbl_free(tbl);
- return 0;
-}
-
-static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
-{
- struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
- struct nft_hash_elem __rcu **pprev;
- unsigned int i;
-
- ntbl = nft_hash_tbl_alloc(tbl->size / 2);
- if (ntbl == NULL)
- return -ENOMEM;
-
- for (i = 0; i < ntbl->size; i++) {
- ntbl->buckets[i] = tbl->buckets[i];
-
- for (pprev = &ntbl->buckets[i]; *pprev != NULL;
- pprev = &nft_dereference(*pprev)->next)
- ;
- RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
- }
-
- /* Publish new table */
- rcu_assign_pointer(priv->tbl, ntbl);
- synchronize_rcu();
-
- nft_hash_tbl_free(tbl);
- return 0;
+ return !!he;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct rhashtable *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- unsigned int size, h;
+ unsigned int size;
if (elem->flags != 0)
return -EINVAL;
@@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
- h = nft_hash_data(&he->key, tbl->size, set->klen);
- RCU_INIT_POINTER(he->next, tbl->buckets[h]);
- rcu_assign_pointer(tbl->buckets[h], he);
-
- /* Expand table when exceeding 75% load */
- if (set->nelems + 1 > tbl->size / 4 * 3)
- nft_hash_tbl_expand(set, priv);
+ rhashtable_insert(priv, &he->node, GFP_KERNEL);
return 0;
}
@@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl = nft_dereference(priv->tbl);
- struct nft_hash_elem *he, __rcu **pprev;
+ struct rhashtable *priv = nft_set_priv(set);
+ struct rhash_head *he, __rcu **pprev;
pprev = elem->cookie;
- he = nft_dereference((*pprev));
+ he = rht_dereference((*pprev), priv);
+
+ rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);
- RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
-
- /* Shrink table beneath 30% load */
- if (set->nelems - 1 < tbl->size * 3 / 10 &&
- tbl->size > NFT_HASH_MIN_SIZE)
- nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
- struct nft_hash_elem __rcu * const *pprev;
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
+ struct rhash_head __rcu * const *pprev;
struct nft_hash_elem *he;
- unsigned int h;
+ u32 h;
- h = nft_hash_data(&elem->key, tbl->size, set->klen);
+ h = rhashtable_hashfn(priv, &elem->key, set->klen);
pprev = &tbl->buckets[h];
- nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
- pprev = &he->next;
+ pprev = &he->node.next;
continue;
}
@@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl;
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
+ tbl = rht_dereference_rcu(priv->tbl, priv);
for (i = 0; i < tbl->size; i++) {
- nft_hash_for_each_entry(he, tbl->buckets[i]) {
+ rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
if (iter->count < iter->skip)
goto cont;
@@ -329,48 +150,48 @@ cont:
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct nft_hash);
+ return sizeof(struct rhashtable);
+}
+
+static int lockdep_nfnl_lock_is_held(void)
+{
+ return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
}
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_table *tbl;
- unsigned int size;
+ struct rhashtable *priv = nft_set_priv(set);
+ struct rhashtable_params params = {
+ .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .key_offset = offsetof(struct nft_hash_elem, key),
+ .key_len = set->klen,
+ .hashfn = jhash,
+ .grow_decision = rht_grow_above_75,
+ .shrink_decision = rht_shrink_below_30,
+ .mutex_is_held = lockdep_nfnl_lock_is_held,
+ };
- if (unlikely(!nft_hash_rnd_initted)) {
- get_random_bytes(&nft_hash_rnd, 4);
- nft_hash_rnd_initted = true;
- }
-
- size = NFT_HASH_MIN_SIZE;
- if (desc->size)
- size = nft_hash_tbl_size(desc->size);
-
- tbl = nft_hash_tbl_alloc(size);
- if (tbl == NULL)
- return -ENOMEM;
- RCU_INIT_POINTER(priv->tbl, tbl);
- return 0;
+ return rhashtable_init(priv, &params);
}
static void nft_hash_destroy(const struct nft_set *set)
{
- const struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ const struct rhashtable *priv = nft_set_priv(set);
+ const struct bucket_table *tbl = priv->tbl;
struct nft_hash_elem *he, *next;
unsigned int i;
for (i = 0; i < tbl->size; i++) {
- for (he = nft_dereference(tbl->buckets[i]); he != NULL;
- he = next) {
- next = nft_dereference(he->next);
+ for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
+ he != NULL; he = next) {
+ next = rht_entry(he->node.next, struct nft_hash_elem, node);
nft_hash_elem_destroy(set, he);
}
}
- kfree(tbl);
+ rhashtable_destroy(priv);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -383,8 +204,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- nft_hash_tbl_size(desc->size) *
+ est->size = sizeof(struct rhashtable) +
+ roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
} else {
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 10cfb156cdf..bde05f28cf1 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_PREFIX] = { .type = NLA_STRING },
[NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
[NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
+ [NFTA_LOG_LEVEL] = { .type = NLA_U32 },
+ [NFTA_LOG_FLAGS] = { .type = NLA_U32 },
};
static int nft_log_init(const struct nft_ctx *ctx,
@@ -50,6 +53,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
+ int ret;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -57,30 +61,74 @@ static int nft_log_init(const struct nft_ctx *ctx,
if (priv->prefix == NULL)
return -ENOMEM;
nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
- } else
+ } else {
priv->prefix = (char *)nft_log_null_prefix;
+ }
- li->type = NF_LOG_TYPE_ULOG;
+ li->type = NF_LOG_TYPE_LOG;
+ if (tb[NFTA_LOG_LEVEL] != NULL &&
+ tb[NFTA_LOG_GROUP] != NULL)
+ return -EINVAL;
if (tb[NFTA_LOG_GROUP] != NULL)
+ li->type = NF_LOG_TYPE_ULOG;
+
+ switch (li->type) {
+ case NF_LOG_TYPE_LOG:
+ if (tb[NFTA_LOG_LEVEL] != NULL) {
+ li->u.log.level =
+ ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
+ } else {
+ li->u.log.level = 4;
+ }
+ if (tb[NFTA_LOG_FLAGS] != NULL) {
+ li->u.log.logflags =
+ ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+ }
+ break;
+ case NF_LOG_TYPE_ULOG:
li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+ if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+ li->u.ulog.copy_len =
+ ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+ }
+ if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+ li->u.ulog.qthreshold =
+ ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ }
+ break;
+ }
- if (tb[NFTA_LOG_SNAPLEN] != NULL)
- li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
- if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
- li->u.ulog.qthreshold =
- ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ if (ctx->afi->family == NFPROTO_INET) {
+ ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
+ if (ret < 0) {
+ nf_logger_put(NFPROTO_IPV4, li->type);
+ return ret;
+ }
+ return 0;
}
- return 0;
+ return nf_logger_find_get(ctx->afi->family, li->type);
}
static void nft_log_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_log *priv = nft_expr_priv(expr);
+ struct nf_loginfo *li = &priv->loginfo;
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
+
+ if (ctx->afi->family == NFPROTO_INET) {
+ nf_logger_put(NFPROTO_IPV4, li->type);
+ nf_logger_put(NFPROTO_IPV6, li->type);
+ } else {
+ nf_logger_put(ctx->afi->family, li->type);
+ }
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -91,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (priv->prefix != nft_log_null_prefix)
if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
goto nla_put_failure;
- if (li->u.ulog.group)
- if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
- goto nla_put_failure;
- if (li->u.ulog.copy_len)
- if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
- htonl(li->u.ulog.copy_len)))
+ switch (li->type) {
+ case NF_LOG_TYPE_LOG:
+ if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level)))
goto nla_put_failure;
- if (li->u.ulog.qthreshold)
- if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
- htons(li->u.ulog.qthreshold)))
+
+ if (li->u.log.logflags) {
+ if (nla_put_be32(skb, NFTA_LOG_FLAGS,
+ htonl(li->u.log.logflags)))
+ goto nla_put_failure;
+ }
+ break;
+ case NF_LOG_TYPE_ULOG:
+ if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
goto nla_put_failure;
+
+ if (li->u.ulog.copy_len) {
+ if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+ htonl(li->u.ulog.copy_len)))
+ goto nla_put_failure;
+ }
+ if (li->u.ulog.qthreshold) {
+ if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+ htons(li->u.ulog.qthreshold)))
+ goto nla_put_failure;
+ }
+ break;
+ }
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e1836ff8819..46214f24566 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
- spin_unlock_bh(&nft_rbtree_lock);
}
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 227aa11e840..272ae4d6fdf 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -71,18 +71,14 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
static const unsigned int xt_jumpstack_multiplier = 2;
/* Registration hooks for targets. */
-int
-xt_register_target(struct xt_target *target)
+int xt_register_target(struct xt_target *target)
{
u_int8_t af = target->family;
- int ret;
- ret = mutex_lock_interruptible(&xt[af].mutex);
- if (ret != 0)
- return ret;
+ mutex_lock(&xt[af].mutex);
list_add(&target->list, &xt[af].target);
mutex_unlock(&xt[af].mutex);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(xt_register_target);
@@ -125,20 +121,14 @@ xt_unregister_targets(struct xt_target *target, unsigned int n)
}
EXPORT_SYMBOL(xt_unregister_targets);
-int
-xt_register_match(struct xt_match *match)
+int xt_register_match(struct xt_match *match)
{
u_int8_t af = match->family;
- int ret;
-
- ret = mutex_lock_interruptible(&xt[af].mutex);
- if (ret != 0)
- return ret;
+ mutex_lock(&xt[af].mutex);
list_add(&match->list, &xt[af].match);
mutex_unlock(&xt[af].mutex);
-
- return ret;
+ return 0;
}
EXPORT_SYMBOL(xt_register_match);
@@ -194,9 +184,7 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
struct xt_match *m;
int err = -ENOENT;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision == revision) {
@@ -239,9 +227,7 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
struct xt_target *t;
int err = -ENOENT;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision == revision) {
@@ -323,10 +309,7 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
{
int have_rev, best = -1;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0) {
- *err = -EINTR;
- return 1;
- }
+ mutex_lock(&xt[af].mutex);
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
@@ -711,28 +694,15 @@ void xt_free_table_info(struct xt_table_info *info)
{
int cpu;
- for_each_possible_cpu(cpu) {
- if (info->size <= PAGE_SIZE)
- kfree(info->entries[cpu]);
- else
- vfree(info->entries[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ kvfree(info->entries[cpu]);
if (info->jumpstack != NULL) {
- if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
- for_each_possible_cpu(cpu)
- vfree(info->jumpstack[cpu]);
- } else {
- for_each_possible_cpu(cpu)
- kfree(info->jumpstack[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ kvfree(info->jumpstack[cpu]);
+ kvfree(info->jumpstack);
}
- if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
- vfree(info->jumpstack);
- else
- kfree(info->jumpstack);
-
free_percpu(info->stackptr);
kfree(info);
@@ -745,9 +715,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
{
struct xt_table *t;
- if (mutex_lock_interruptible(&xt[af].mutex) != 0)
- return ERR_PTR(-EINTR);
-
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &net->xt.tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
@@ -896,10 +864,7 @@ struct xt_table *xt_register_table(struct net *net,
goto out;
}
- ret = mutex_lock_interruptible(&xt[table->af].mutex);
- if (ret != 0)
- goto out_free;
-
+ mutex_lock(&xt[table->af].mutex);
/* Don't autoload: we'd eat our tail... */
list_for_each_entry(t, &net->xt.tables[table->af], list) {
if (strcmp(t->name, table->name) == 0) {
@@ -924,9 +889,8 @@ struct xt_table *xt_register_table(struct net *net,
mutex_unlock(&xt[table->af].mutex);
return table;
- unlock:
+unlock:
mutex_unlock(&xt[table->af].mutex);
-out_free:
kfree(table);
out:
return ERR_PTR(ret);
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 993de2ba89d..3ba31c194cc 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -50,11 +50,14 @@ struct xt_led_info_internal {
struct timer_list timer;
};
+#define XT_LED_BLINK_DELAY 50 /* ms */
+
static unsigned int
led_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_led_info *ledinfo = par->targinfo;
struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+ unsigned long led_delay = XT_LED_BLINK_DELAY;
/*
* If "always blink" is enabled, and there's still some time until the
@@ -62,9 +65,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
*/
if ((ledinfo->delay > 0) && ledinfo->always_blink &&
timer_pending(&ledinternal->timer))
- led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
-
- led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
+ led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
+ &led_delay, &led_delay, 1);
+ else
+ led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
/* If there's a positive delay, start/update the timer */
if (ledinfo->delay > 0) {
@@ -133,9 +137,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
err = led_trigger_register(&ledinternal->netfilter_led_trigger);
if (err) {
- pr_warning("led_trigger_register() failed\n");
- if (err == -EEXIST)
- pr_warning("Trigger name is already in use.\n");
+ pr_err("Trigger name is already in use.\n");
goto exit_alloc;
}
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 5ab24843370..c13b79440ed 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -27,806 +27,6 @@
#include <linux/netfilter/xt_LOG.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_log.h>
-#include <net/netfilter/xt_log.h>
-
-static struct nf_loginfo default_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 5,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset)
-{
- struct udphdr _udph;
- const struct udphdr *uh;
-
- if (proto == IPPROTO_UDP)
- /* Max length: 10 "PROTO=UDP " */
- sb_add(m, "PROTO=UDP ");
- else /* Max length: 14 "PROTO=UDPLITE " */
- sb_add(m, "PROTO=UDPLITE ");
-
- if (fragment)
- goto out;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
- if (uh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
-
- return 1;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest),
- ntohs(uh->len));
-
-out:
- return 0;
-}
-
-static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb,
- u8 proto, int fragment, unsigned int offset,
- unsigned int logflags)
-{
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* Max length: 10 "PROTO=TCP " */
- sb_add(m, "PROTO=TCP ");
-
- if (fragment)
- return 0;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
- if (th == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
- return 1;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest));
- /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & XT_LOG_TCPSEQ)
- sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq));
-
- /* Max length: 13 "WINDOW=65535 " */
- sb_add(m, "WINDOW=%u ", ntohs(th->window));
- /* Max length: 9 "RES=0x3C " */
- sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
- TCP_RESERVED_BITS) >> 22));
- /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (th->cwr)
- sb_add(m, "CWR ");
- if (th->ece)
- sb_add(m, "ECE ");
- if (th->urg)
- sb_add(m, "URG ");
- if (th->ack)
- sb_add(m, "ACK ");
- if (th->psh)
- sb_add(m, "PSH ");
- if (th->rst)
- sb_add(m, "RST ");
- if (th->syn)
- sb_add(m, "SYN ");
- if (th->fin)
- sb_add(m, "FIN ");
- /* Max length: 11 "URGP=65535 " */
- sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
-
- if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
- u_int8_t _opt[60 - sizeof(struct tcphdr)];
- const u_int8_t *op;
- unsigned int i;
- unsigned int optsize = th->doff*4 - sizeof(struct tcphdr);
-
- op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
- optsize, _opt);
- if (op == NULL) {
- sb_add(m, "OPT (TRUNCATED)");
- return 1;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- sb_add(m, "OPT (");
- for (i = 0; i < optsize; i++)
- sb_add(m, "%02X", op[i]);
-
- sb_add(m, ") ");
- }
-
- return 0;
-}
-
-static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk)
-{
- if (!sk || sk->sk_state == TCP_TIME_WAIT)
- return;
-
- read_lock_bh(&sk->sk_callback_lock);
- if (sk->sk_socket && sk->sk_socket->file) {
- const struct cred *cred = sk->sk_socket->file->f_cred;
- sb_add(m, "UID=%u GID=%u ",
- from_kuid_munged(&init_user_ns, cred->fsuid),
- from_kgid_munged(&init_user_ns, cred->fsgid));
- }
- read_unlock_bh(&sk->sk_callback_lock);
-}
-
-/* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb,
- unsigned int iphoff)
-{
- struct iphdr _iph;
- const struct iphdr *ih;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
- if (ih == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Important fields:
- * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
- /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
- sb_add(m, "SRC=%pI4 DST=%pI4 ",
- &ih->saddr, &ih->daddr);
-
- /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
- sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
- ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
- /* Max length: 6 "CE DF MF " */
- if (ntohs(ih->frag_off) & IP_CE)
- sb_add(m, "CE ");
- if (ntohs(ih->frag_off) & IP_DF)
- sb_add(m, "DF ");
- if (ntohs(ih->frag_off) & IP_MF)
- sb_add(m, "MF ");
-
- /* Max length: 11 "FRAG:65535 " */
- if (ntohs(ih->frag_off) & IP_OFFSET)
- sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
- if ((logflags & XT_LOG_IPOPT) &&
- ih->ihl * 4 > sizeof(struct iphdr)) {
- const unsigned char *op;
- unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
- unsigned int i, optsize;
-
- optsize = ih->ihl * 4 - sizeof(struct iphdr);
- op = skb_header_pointer(skb, iphoff+sizeof(_iph),
- optsize, _opt);
- if (op == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- sb_add(m, "OPT (");
- for (i = 0; i < optsize; i++)
- sb_add(m, "%02X", op[i]);
- sb_add(m, ") ");
- }
-
- switch (ih->protocol) {
- case IPPROTO_TCP:
- if (dump_tcp_header(m, skb, ih->protocol,
- ntohs(ih->frag_off) & IP_OFFSET,
- iphoff+ih->ihl*4, logflags))
- return;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- if (dump_udp_header(m, skb, ih->protocol,
- ntohs(ih->frag_off) & IP_OFFSET,
- iphoff+ih->ihl*4))
- return;
- break;
- case IPPROTO_ICMP: {
- struct icmphdr _icmph;
- const struct icmphdr *ich;
- static const size_t required_len[NR_ICMP_TYPES+1]
- = { [ICMP_ECHOREPLY] = 4,
- [ICMP_DEST_UNREACH]
- = 8 + sizeof(struct iphdr),
- [ICMP_SOURCE_QUENCH]
- = 8 + sizeof(struct iphdr),
- [ICMP_REDIRECT]
- = 8 + sizeof(struct iphdr),
- [ICMP_ECHO] = 4,
- [ICMP_TIME_EXCEEDED]
- = 8 + sizeof(struct iphdr),
- [ICMP_PARAMETERPROB]
- = 8 + sizeof(struct iphdr),
- [ICMP_TIMESTAMP] = 20,
- [ICMP_TIMESTAMPREPLY] = 20,
- [ICMP_ADDRESS] = 12,
- [ICMP_ADDRESSREPLY] = 12 };
-
- /* Max length: 11 "PROTO=ICMP " */
- sb_add(m, "PROTO=ICMP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_icmph), &_icmph);
- if (ich == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (ich->type <= NR_ICMP_TYPES &&
- required_len[ich->type] &&
- skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- switch (ich->type) {
- case ICMP_ECHOREPLY:
- case ICMP_ECHO:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- sb_add(m, "ID=%u SEQ=%u ",
- ntohs(ich->un.echo.id),
- ntohs(ich->un.echo.sequence));
- break;
-
- case ICMP_PARAMETERPROB:
- /* Max length: 14 "PARAMETER=255 " */
- sb_add(m, "PARAMETER=%u ",
- ntohl(ich->un.gateway) >> 24);
- break;
- case ICMP_REDIRECT:
- /* Max length: 24 "GATEWAY=255.255.255.255 " */
- sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
- /* Fall through */
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_TIME_EXCEEDED:
- /* Max length: 3+maxlen */
- if (!iphoff) { /* Only recurse once. */
- sb_add(m, "[");
- dump_ipv4_packet(m, info, skb,
- iphoff + ih->ihl*4+sizeof(_icmph));
- sb_add(m, "] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ich->type == ICMP_DEST_UNREACH &&
- ich->code == ICMP_FRAG_NEEDED)
- sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
- }
- break;
- }
- /* Max Length */
- case IPPROTO_AH: {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 9 "PROTO=AH " */
- sb_add(m, "PROTO=AH ");
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_ahdr), &_ahdr);
- if (ah == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
- break;
- }
- case IPPROTO_ESP: {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 10 "PROTO=ESP " */
- sb_add(m, "PROTO=ESP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_esph), &_esph);
- if (eh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
- break;
- }
- /* Max length: 10 "PROTO 255 " */
- default:
- sb_add(m, "PROTO=%u ", ih->protocol);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && !iphoff)
- dump_sk_uid_gid(m, skb->sk);
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!iphoff && skb->mark)
- sb_add(m, "MARK=0x%x ", skb->mark);
-
- /* Proto Max log string length */
- /* IP: 40+46+6+11+127 = 230 */
- /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
- /* UDP: 10+max(25,20) = 35 */
- /* UDPLITE: 14+max(25,20) = 39 */
- /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
- /* ESP: 10+max(25)+15 = 50 */
- /* AH: 9+max(25)+15 = 49 */
- /* unknown: 10 */
-
- /* (ICMP allows recursion one level deep) */
- /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
- /* maxlen = 230+ 91 + 230 + 252 = 803 */
-}
-
-static void dump_ipv4_mac_header(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & XT_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- sb_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int i;
-
- sb_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- sb_add(m, ":%02x", *p);
- }
- sb_add(m, " ");
-}
-
-static void
-log_packet_common(struct sbuff *m,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
- '0' + loginfo->u.log.level, prefix,
- in ? in->name : "",
- out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- const struct net_device *physindev;
- const struct net_device *physoutdev;
-
- physindev = skb->nf_bridge->physindev;
- if (physindev && in != physindev)
- sb_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev && out != physoutdev)
- sb_add(m, "PHYSOUT=%s ", physoutdev->name);
- }
-#endif
-}
-
-
-static void
-ipt_log_packet(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- struct sbuff *m;
-
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net))
- return;
-
- m = sb_open();
-
- if (!loginfo)
- loginfo = &default_loginfo;
-
- log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix);
-
- if (in != NULL)
- dump_ipv4_mac_header(m, loginfo, skb);
-
- dump_ipv4_packet(m, loginfo, skb, 0);
-
- sb_close(m);
-}
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-/* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb, unsigned int ip6hoff,
- int recurse)
-{
- u_int8_t currenthdr;
- int fragment;
- struct ipv6hdr _ip6h;
- const struct ipv6hdr *ih;
- unsigned int ptr;
- unsigned int hdrlen = 0;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
- if (ih == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
- sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
-
- /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
- sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
- ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
- (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
- ih->hop_limit,
- (ntohl(*(__be32 *)ih) & 0x000fffff));
-
- fragment = 0;
- ptr = ip6hoff + sizeof(struct ipv6hdr);
- currenthdr = ih->nexthdr;
- while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
- struct ipv6_opt_hdr _hdr;
- const struct ipv6_opt_hdr *hp;
-
- hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- sb_add(m, "TRUNCATED");
- return;
- }
-
- /* Max length: 48 "OPT (...) " */
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, "OPT ( ");
-
- switch (currenthdr) {
- case IPPROTO_FRAGMENT: {
- struct frag_hdr _fhdr;
- const struct frag_hdr *fh;
-
- sb_add(m, "FRAG:");
- fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
- &_fhdr);
- if (fh == NULL) {
- sb_add(m, "TRUNCATED ");
- return;
- }
-
- /* Max length: 6 "65535 " */
- sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
-
- /* Max length: 11 "INCOMPLETE " */
- if (fh->frag_off & htons(0x0001))
- sb_add(m, "INCOMPLETE ");
-
- sb_add(m, "ID:%08x ", ntohl(fh->identification));
-
- if (ntohs(fh->frag_off) & 0xFFF8)
- fragment = 1;
-
- hdrlen = 8;
-
- break;
- }
- case IPPROTO_DSTOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_HOPOPTS:
- if (fragment) {
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, ")");
- return;
- }
- hdrlen = ipv6_optlen(hp);
- break;
- /* Max Length */
- case IPPROTO_AH:
- if (logflags & XT_LOG_IPOPT) {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- /* Max length: 3 "AH " */
- sb_add(m, "AH ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
- &_ahdr);
- if (ah == NULL) {
- /*
- * Max length: 26 "INCOMPLETE [65535
- * bytes] )"
- */
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 15 "SPI=0xF1234567 */
- sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
-
- }
-
- hdrlen = (hp->hdrlen+2)<<2;
- break;
- case IPPROTO_ESP:
- if (logflags & XT_LOG_IPOPT) {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 4 "ESP " */
- sb_add(m, "ESP ");
-
- if (fragment) {
- sb_add(m, ")");
- return;
- }
-
- /*
- * Max length: 26 "INCOMPLETE [65535 bytes] )"
- */
- eh = skb_header_pointer(skb, ptr, sizeof(_esph),
- &_esph);
- if (eh == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] )",
- skb->len - ptr);
- return;
- }
-
- /* Length: 16 "SPI=0xF1234567 )" */
- sb_add(m, "SPI=0x%x )", ntohl(eh->spi));
-
- }
- return;
- default:
- /* Max length: 20 "Unknown Ext Hdr 255" */
- sb_add(m, "Unknown Ext Hdr %u", currenthdr);
- return;
- }
- if (logflags & XT_LOG_IPOPT)
- sb_add(m, ") ");
-
- currenthdr = hp->nexthdr;
- ptr += hdrlen;
- }
-
- switch (currenthdr) {
- case IPPROTO_TCP:
- if (dump_tcp_header(m, skb, currenthdr, fragment, ptr,
- logflags))
- return;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- if (dump_udp_header(m, skb, currenthdr, fragment, ptr))
- return;
- break;
- case IPPROTO_ICMPV6: {
- struct icmp6hdr _icmp6h;
- const struct icmp6hdr *ic;
-
- /* Max length: 13 "PROTO=ICMPv6 " */
- sb_add(m, "PROTO=ICMPv6 ");
-
- if (fragment)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
- if (ic == NULL) {
- sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
- return;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
-
- switch (ic->icmp6_type) {
- case ICMPV6_ECHO_REQUEST:
- case ICMPV6_ECHO_REPLY:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- sb_add(m, "ID=%u SEQ=%u ",
- ntohs(ic->icmp6_identifier),
- ntohs(ic->icmp6_sequence));
- break;
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- break;
-
- case ICMPV6_PARAMPROB:
- /* Max length: 17 "POINTER=ffffffff " */
- sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
- /* Fall through */
- case ICMPV6_DEST_UNREACH:
- case ICMPV6_PKT_TOOBIG:
- case ICMPV6_TIME_EXCEED:
- /* Max length: 3+maxlen */
- if (recurse) {
- sb_add(m, "[");
- dump_ipv6_packet(m, info, skb,
- ptr + sizeof(_icmp6h), 0);
- sb_add(m, "] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
- sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
- }
- break;
- }
- /* Max length: 10 "PROTO=255 " */
- default:
- sb_add(m, "PROTO=%u ", currenthdr);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && recurse)
- dump_sk_uid_gid(m, skb->sk);
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (recurse && skb->mark)
- sb_add(m, "MARK=0x%x ", skb->mark);
-}
-
-static void dump_ipv6_mac_header(struct sbuff *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & XT_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- sb_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p != NULL) {
- sb_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- sb_add(m, ":%02x", *p++);
- }
- sb_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else
- sb_add(m, " ");
-}
-
-static void
-ip6t_log_packet(struct net *net,
- u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- struct sbuff *m;
-
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net))
- return;
-
- m = sb_open();
-
- if (!loginfo)
- loginfo = &default_loginfo;
-
- log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix);
-
- if (in != NULL)
- dump_ipv6_mac_header(m, loginfo, skb);
-
- dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
-
- sb_close(m);
-}
-#endif
static unsigned int
log_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -839,17 +39,8 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- if (par->family == NFPROTO_IPV4)
- ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in,
- par->out, &li, loginfo->prefix);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- else if (par->family == NFPROTO_IPV6)
- ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in,
- par->out, &li, loginfo->prefix);
-#endif
- else
- WARN_ON_ONCE(1);
-
+ nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out,
+ &li, "%s", loginfo->prefix);
return XT_CONTINUE;
}
@@ -870,7 +61,12 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return 0;
+ return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void log_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_logger_put(par->family, NF_LOG_TYPE_LOG);
}
static struct xt_target log_tg_regs[] __read_mostly = {
@@ -880,6 +76,7 @@ static struct xt_target log_tg_regs[] __read_mostly = {
.target = log_tg,
.targetsize = sizeof(struct xt_log_info),
.checkentry = log_tg_check,
+ .destroy = log_tg_destroy,
.me = THIS_MODULE,
},
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -889,78 +86,19 @@ static struct xt_target log_tg_regs[] __read_mostly = {
.target = log_tg,
.targetsize = sizeof(struct xt_log_info),
.checkentry = log_tg_check,
+ .destroy = log_tg_destroy,
.me = THIS_MODULE,
},
#endif
};
-static struct nf_logger ipt_log_logger __read_mostly = {
- .name = "ipt_LOG",
- .logfn = &ipt_log_packet,
- .me = THIS_MODULE,
-};
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static struct nf_logger ip6t_log_logger __read_mostly = {
- .name = "ip6t_LOG",
- .logfn = &ip6t_log_packet,
- .me = THIS_MODULE,
-};
-#endif
-
-static int __net_init log_net_init(struct net *net)
-{
- nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
-#endif
- return 0;
-}
-
-static void __net_exit log_net_exit(struct net *net)
-{
- nf_log_unset(net, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_unset(net, &ip6t_log_logger);
-#endif
-}
-
-static struct pernet_operations log_net_ops = {
- .init = log_net_init,
- .exit = log_net_exit,
-};
-
static int __init log_tg_init(void)
{
- int ret;
-
- ret = register_pernet_subsys(&log_net_ops);
- if (ret < 0)
- goto err_pernet;
-
- ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
- if (ret < 0)
- goto err_target;
-
- nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
-#endif
- return 0;
-
-err_target:
- unregister_pernet_subsys(&log_net_ops);
-err_pernet:
- return ret;
+ return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
}
static void __exit log_tg_exit(void)
{
- unregister_pernet_subsys(&log_net_ops);
- nf_log_unregister(&ipt_log_logger);
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- nf_log_unregister(&ip6t_log_logger);
-#endif
xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
}
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf60..dffee9d47ec 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
program.len = info->bpf_program_num_elem;
program.filter = info->bpf_program;
- if (sk_unattached_filter_create(&info->filter, &program)) {
+ if (bpf_prog_create(&info->filter, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- return SK_RUN_FILTER(info->filter, skb);
+ return BPF_PROG_RUN(info->filter, skb);
}
static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- sk_unattached_filter_destroy(info->filter);
+ bpf_prog_destroy(info->filter);
}
static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index f4e83300532..7198d660b4d 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -31,7 +31,7 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
if (info->invert & ~1)
return -EINVAL;
- return info->id ? 0 : -EINVAL;
+ return 0;
}
static bool
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a3910fc2122..47dc6836830 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -104,7 +104,7 @@ struct xt_hashlimit_htable {
spinlock_t lock; /* lock for list_head */
u_int32_t rnd; /* random seed for hash */
unsigned int count; /* number entries in table */
- struct timer_list timer; /* timer for gc */
+ struct delayed_work gc_work;
/* seq_file stuff */
struct proc_dir_entry *pde;
@@ -213,7 +213,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
call_rcu_bh(&ent->rcu, dsthash_free_rcu);
ht->count--;
}
-static void htable_gc(unsigned long htlong);
+static void htable_gc(struct work_struct *work);
static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
u_int8_t family)
@@ -273,9 +273,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
}
hinfo->net = net;
- setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
- hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
- add_timer(&hinfo->timer);
+ INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc);
+ queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work,
+ msecs_to_jiffies(hinfo->cfg.gc_interval));
hlist_add_head(&hinfo->node, &hashlimit_net->htables);
@@ -300,29 +300,30 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
{
unsigned int i;
- /* lock hash table and iterate over it */
- spin_lock_bh(&ht->lock);
for (i = 0; i < ht->cfg.size; i++) {
struct dsthash_ent *dh;
struct hlist_node *n;
+
+ spin_lock_bh(&ht->lock);
hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
if ((*select)(ht, dh))
dsthash_free(ht, dh);
}
+ spin_unlock_bh(&ht->lock);
+ cond_resched();
}
- spin_unlock_bh(&ht->lock);
}
-/* hash table garbage collector, run by timer */
-static void htable_gc(unsigned long htlong)
+static void htable_gc(struct work_struct *work)
{
- struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
+ struct xt_hashlimit_htable *ht;
+
+ ht = container_of(work, struct xt_hashlimit_htable, gc_work.work);
htable_selective_cleanup(ht, select_gc);
- /* re-add the timer accordingly */
- ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
- add_timer(&ht->timer);
+ queue_delayed_work(system_power_efficient_wq,
+ &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval));
}
static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
@@ -341,7 +342,7 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
static void htable_destroy(struct xt_hashlimit_htable *hinfo)
{
- del_timer_sync(&hinfo->timer);
+ cancel_delayed_work_sync(&hinfo->gc_work);
htable_remove_proc_entry(hinfo);
htable_selective_cleanup(hinfo, select_all);
kfree(hinfo->name);