summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorHarald Welte <laforge@netfilter.org>2005-08-09 19:28:03 -0700
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 15:31:24 -0700
commitac3247baf8ecadf168642e3898b0212c29c79715 (patch)
treef2b1c65f34c035491d921006efcf8b2e7a707785 /net
parentabc3bc58047efa72ee9c2e208cbeb73d261ad703 (diff)
[NETFILTER]: connection tracking event notifiers
This adds a notifier chain based event mechanism for ip_conntrack state changes. As opposed to the previous implementations in patch-o-matic, we do no longer need a field in the skb to achieve this. Thanks to the valuable input from Patrick McHardy and Rusty on the idea of a per_cpu implementation. Signed-off-by: Harald Welte <laforge@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c122
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c10
8 files changed, 154 insertions, 10 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 46d4cb1c06f..ff3393eba92 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK
of packets, but this mark value is kept in the conntrack session
instead of the individual packets.
+config IP_NF_CONNTRACK_EVENTS
+ bool "Connection tracking events"
+ depends on IP_NF_CONNTRACK
+ help
+ If this option is enabled, the connection tracking code will
+ provide a notifier chain that can be used by other kernel code
+ to get notified about changes in the connection tracking state.
+
+ IF unsure, say `N'.
+
config IP_NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on IP_NF_CONNTRACK && EXPERIMENTAL
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 04c3414361d..caf89deae11 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -37,6 +37,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
+#include <linux/notifier.h>
/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -49,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
-#define IP_CONNTRACK_VERSION "2.1"
+#define IP_CONNTRACK_VERSION "2.2"
#if 0
#define DEBUGP printk
@@ -76,6 +77,81 @@ unsigned int ip_ct_log_invalid;
static LIST_HEAD(unconfirmed);
static int ip_conntrack_vmalloc;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+struct notifier_block *ip_conntrack_chain;
+struct notifier_block *ip_conntrack_expect_chain;
+
+DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+
+static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache)
+{
+ if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
+ notifier_call_chain(&ip_conntrack_chain, ecache->events,
+ ecache->ct);
+ ecache->events = 0;
+}
+
+void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
+{
+ __deliver_cached_events(ecache);
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling or freeing the skb */
+void
+ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
+{
+ struct ip_conntrack_ecache *ecache =
+ &__get_cpu_var(ip_conntrack_ecache);
+
+ if (!ct)
+ return;
+
+ if (ecache->ct == ct) {
+ DEBUGP("ecache: delivering event for %p\n", ct);
+ __deliver_cached_events(ecache);
+ } else {
+ if (net_ratelimit())
+ printk(KERN_WARNING "ecache: want to deliver for %p, "
+ "but cache has %p\n", ct, ecache->ct);
+ }
+
+ /* signalize that events have already been delivered */
+ ecache->ct = NULL;
+}
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void ip_conntrack_event_cache_init(const struct sk_buff *skb)
+{
+ struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct;
+ struct ip_conntrack_ecache *ecache =
+ &__get_cpu_var(ip_conntrack_ecache);
+
+ /* take care of delivering potentially old events */
+ if (ecache->ct != ct) {
+ enum ip_conntrack_info ctinfo;
+ /* we have to check, since at startup the cache is NULL */
+ if (likely(ecache->ct)) {
+ DEBUGP("ecache: entered for different conntrack: "
+ "ecache->ct=%p, skb->nfct=%p. delivering "
+ "events\n", ecache->ct, ct);
+ __deliver_cached_events(ecache);
+ ip_conntrack_put(ecache->ct);
+ } else {
+ DEBUGP("ecache: entered for conntrack %p, "
+ "cache was clean before\n", ct);
+ }
+
+ /* initialize for this conntrack/packet */
+ ecache->ct = ip_conntrack_get(skb, &ctinfo);
+ /* ecache->events cleared by __deliver_cached_devents() */
+ } else {
+ DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
+ }
+}
+
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+
DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
void
@@ -223,6 +299,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
IP_NF_ASSERT(!timer_pending(&ct->timeout));
+ set_bit(IPS_DYING_BIT, &ct->status);
+
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
@@ -261,6 +339,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
{
struct ip_conntrack *ct = (void *)ul_conntrack;
+ ip_conntrack_event(IPCT_DESTROY, ct);
write_lock_bh(&ip_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
@@ -374,6 +453,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
set_bit(IPS_CONFIRMED_BIT, &ct->status);
CONNTRACK_STAT_INC(insert);
write_unlock_bh(&ip_conntrack_lock);
+ if (ct->helper)
+ ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+ ip_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, *pskb);
+
return NF_ACCEPT;
}
@@ -607,7 +696,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
struct ip_conntrack_protocol *proto;
- int set_reply;
+ int set_reply = 0;
int ret;
/* Previously seen (loopback or untracked)? Ignore. */
@@ -666,6 +755,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
IP_NF_ASSERT((*pskb)->nfct);
+ ip_conntrack_event_cache_init(*pskb);
+
ret = proto->packet(ct, *pskb, ctinfo);
if (ret < 0) {
/* Invalid: inverse of the return code tells
@@ -676,8 +767,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
return -ret;
}
- if (set_reply)
- set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+ if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ ip_conntrack_event_cache(IPCT_STATUS, *pskb);
return ret;
}
@@ -824,6 +915,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
evict_oldest_expect(expect->master);
ip_conntrack_expect_insert(expect);
+ ip_conntrack_expect_event(IPEXP_NEW, expect);
ret = 0;
out:
write_unlock_bh(&ip_conntrack_lock);
@@ -861,8 +953,10 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
static inline int unhelp(struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack_helper *me)
{
- if (tuplehash_to_ctrack(i)->helper == me)
+ if (tuplehash_to_ctrack(i)->helper == me) {
+ ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
tuplehash_to_ctrack(i)->helper = NULL;
+ }
return 0;
}
@@ -924,6 +1018,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
+ ip_conntrack_event_cache(IPCT_REFRESH, skb);
}
ct_add_counters(ct, ctinfo, skb);
write_unlock_bh(&ip_conntrack_lock);
@@ -1012,6 +1107,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
ip_conntrack_put(ct);
}
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+ {
+ /* we need to deliver all cached events in order to drop
+ * the reference counts */
+ int cpu;
+ for_each_cpu(cpu) {
+ struct ip_conntrack_ecache *ecache =
+ &per_cpu(ip_conntrack_ecache, cpu);
+ if (ecache->ct) {
+ __ip_ct_deliver_cached_events(ecache);
+ ip_conntrack_put(ecache->ct);
+ ecache->ct = NULL;
+ }
+ }
+ }
+#endif
}
/* Fast function for those who don't want to parse /proc (and I don't
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 7a3b773be3f..9658896f899 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
}
/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+ struct sk_buff *skb)
{
unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
oldest = i;
}
- if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+ if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- else if (oldest != NUM_SEQ_TO_REMEMBER)
+ ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ } else if (oldest != NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][oldest] = nl_seq;
+ ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ }
}
static int help(struct sk_buff **pskb,
@@ -439,7 +443,7 @@ out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info,dir);
+ update_nl_seq(seq, ct_ftp_info,dir, *pskb);
out:
spin_unlock_bh(&ip_ftp_lock);
return ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 602c74db325..dca1f63d6f5 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -102,6 +102,7 @@ static int icmp_packet(struct ip_conntrack *ct,
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
+ ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 31d75390bf1..3d5f878a07d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
}
conntrack->proto.sctp.state = newconntrack;
+ if (oldsctpstate != newconntrack)
+ ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
write_unlock_bh(&sctp_lock);
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 809dfed766d..a569ad1ee4d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -973,6 +973,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
write_unlock_bh(&tcp_lock);
+ ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ if (new_state != old_state)
+ ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
+
if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 8c1eaba098d..6066eaf4d82 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack,
ip_ct_refresh_acct(conntrack, ctinfo, skb,
ip_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
} else
ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dccd4abab7a..f0880004115 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -402,6 +402,7 @@ static unsigned int ip_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ ip_conntrack_event_cache_init(*pskb);
/* We've seen it coming out the other side: confirm it */
return ip_conntrack_confirm(pskb);
}
@@ -419,6 +420,7 @@ static unsigned int ip_conntrack_help(unsigned int hooknum,
ct = ip_conntrack_get(*pskb, &ctinfo);
if (ct && ct->helper) {
unsigned int ret;
+ ip_conntrack_event_cache_init(*pskb);
ret = ct->helper->help(pskb, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
@@ -889,6 +891,7 @@ static int init_or_cleanup(int init)
return ret;
cleanup:
+ synchronize_net();
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ip_ct_sysctl_header);
cleanup_localinops:
@@ -971,6 +974,13 @@ void need_ip_conntrack(void)
{
}
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+EXPORT_SYMBOL_GPL(ip_conntrack_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
+EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
+EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+#endif
EXPORT_SYMBOL(ip_conntrack_protocol_register);
EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
EXPORT_SYMBOL(ip_ct_get_tuple);