From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Use unsigned types for hooknum and pf vars and (try to) consistently use u_int8_t for the L3 family. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_log.h | 8 ++++---- include/net/netfilter/nf_queue.h | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index a8177121093..05760d6a706 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,7 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(int pf, +extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index dfdf4b45947..4c4d894cb9b 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -86,7 +86,7 @@ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, int, +void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, const union nf_inet_addr *, const union nf_inet_addr *, u_int8_t, const __be16 *, const __be16 *); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 723df9d1cc3..d4376e97bae 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -39,7 +39,7 @@ struct nf_conntrack_l4proto const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum); /* Called when a new connection for this protocol found; @@ -52,7 +52,7 @@ struct nf_conntrack_l4proto int (*error)(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, unsigned int hooknum); + u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ int (*print_tuple)(struct seq_file *s, diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 8c6b5ae4553..7182c06974f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -28,7 +28,7 @@ struct nf_loginfo { } u; }; -typedef void nf_logfn(unsigned int pf, +typedef void nf_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -43,12 +43,12 @@ struct nf_logger { }; /* Function to register/unregister log function. */ -int nf_log_register(int pf, const struct nf_logger *logger); +int nf_log_register(u_int8_t pf, const struct nf_logger *logger); void nf_log_unregister(const struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +void nf_log_unregister_pf(u_int8_t pf); /* Calls the registered backend logging function */ -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d030044e923..252fd1010b7 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -8,7 +8,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_ops *elem; - int pf; + u_int8_t pf; unsigned int hook; struct net_device *indev; struct net_device *outdev; @@ -24,9 +24,9 @@ struct nf_queue_handler { char *name; }; -extern int nf_register_queue_handler(int pf, +extern int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf, +extern int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -- cgit v1.2.3-70-g09d2 From dfdb8d791877052bbb527d9688d94a064721d8f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add netns boilerplate One comment: #ifdefs around #include is necessary to overcome amazing compile breakages in NOTRACK-in-netns patch (see below). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/net_namespace.h | 6 ++++++ include/net/netfilter/nf_conntrack_core.h | 4 ++-- include/net/netns/conntrack.h | 6 ++++++ net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_expect.c | 4 ++-- net/netfilter/nf_conntrack_standalone.c | 21 ++++++++++++++++++--- 6 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 include/net/netns/conntrack.h (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a8eb43cf0c7..708009be88b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -16,6 +16,9 @@ #include #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include +#endif struct proc_dir_entry; struct net_device; @@ -67,6 +70,9 @@ struct net { #endif #ifdef CONFIG_NETFILTER struct netns_xt xt; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct netns_ct ct; +#endif #endif struct net_generic *gen; }; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 05760d6a706..532aa200cbc 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -24,8 +24,8 @@ extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); -extern int nf_conntrack_init(void); -extern void nf_conntrack_cleanup(void); +extern int nf_conntrack_init(struct net *net); +extern void nf_conntrack_cleanup(struct net *net); extern int nf_conntrack_proto_init(void); extern void nf_conntrack_proto_fini(void); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h new file mode 100644 index 00000000000..82d80b83477 --- /dev/null +++ b/include/net/netns/conntrack.h @@ -0,0 +1,6 @@ +#ifndef __NETNS_CONNTRACK_H +#define __NETNS_CONNTRACK_H + +struct netns_ct { +}; +#endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6aaf64b5ded..ee79e932589 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1006,7 +1006,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush); /* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */ -void nf_conntrack_cleanup(void) +void nf_conntrack_cleanup(struct net *net) { rcu_assign_pointer(ip_ct_attach, NULL); @@ -1120,7 +1120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -int __init nf_conntrack_init(void) +int nf_conntrack_init(struct net *net) { int max_factor = 8; int ret; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 990fa12f2ee..e6a79f2a7c5 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -537,7 +537,7 @@ static const struct file_operations exp_file_ops = { }; #endif /* CONFIG_PROC_FS */ -static int __init exp_proc_init(void) +static int exp_proc_init(void) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; @@ -558,7 +558,7 @@ static void exp_proc_remove(void) module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int __init nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(void) { int err = -ENOMEM; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 8509db14670..81dec17196d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -440,11 +440,26 @@ static void nf_conntrack_standalone_fini_sysctl(void) } #endif /* CONFIG_SYSCTL */ +static int nf_conntrack_net_init(struct net *net) +{ + return nf_conntrack_init(net); +} + +static void nf_conntrack_net_exit(struct net *net) +{ + nf_conntrack_cleanup(net); +} + +static struct pernet_operations nf_conntrack_net_ops = { + .init = nf_conntrack_net_init, + .exit = nf_conntrack_net_exit, +}; + static int __init nf_conntrack_standalone_init(void) { int ret; - ret = nf_conntrack_init(); + ret = register_pernet_subsys(&nf_conntrack_net_ops); if (ret < 0) goto out; ret = nf_conntrack_standalone_init_proc(); @@ -458,7 +473,7 @@ static int __init nf_conntrack_standalone_init(void) out_sysctl: nf_conntrack_standalone_fini_proc(); out_proc: - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); out: return ret; } @@ -467,7 +482,7 @@ static void __exit nf_conntrack_standalone_fini(void) { nf_conntrack_standalone_fini_sysctl(); nf_conntrack_standalone_fini_proc(); - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); } module_init(nf_conntrack_standalone_init); -- cgit v1.2.3-70-g09d2 From 5a1fb391d881905e89623d78858d05b248cbc86a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add ->ct_net -- pointer from conntrack to netns Conntrack (struct nf_conn) gets pointer to netns: ->ct_net -- netns in which it was created. It comes from netdevice. ->ct_net is write-once field. Every conntrack in system has ->ct_net initialized, no exceptions. ->ct_net doesn't pin netns: conntracks are recycled after timeouts and pinning background traffic will prevent netns from even starting shutdown sequence. Right now every conntrack is created in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 18 ++++++++++++++++-- net/netfilter/nf_conntrack_core.c | 17 +++++++++++++---- net/netfilter/nf_conntrack_netlink.c | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0741ad592da..2b8d6efecf3 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -123,7 +123,9 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; - +#ifdef CONFIG_NET_NS + struct net *ct_net; +#endif struct rcu_head rcu; }; @@ -147,6 +149,17 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) +extern struct net init_net; + +static inline struct net *nf_ct_net(const struct nf_conn *ct) +{ +#ifdef CONFIG_NET_NS + return ct->ct_net; +#else + return &init_net; +#endif +} + /* Alter reply tuple (maybe alter helper). */ extern void nf_conntrack_alter_reply(struct nf_conn *ct, @@ -251,7 +264,8 @@ extern void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ee79e932589..cefc338f6e5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -464,7 +464,8 @@ static noinline int early_drop(unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) { @@ -503,6 +504,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); +#ifdef CONFIG_NET_NS + ct->ct_net = net; +#endif INIT_RCU_HEAD(&ct->rcu); return ct; @@ -528,7 +532,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(const struct nf_conntrack_tuple *tuple, +init_conntrack(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, @@ -544,7 +549,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -631,7 +636,8 @@ resolve_normal_ct(struct sk_buff *skb, /* look for tuple match */ h = nf_conntrack_find_get(&tuple); if (!h) { - h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, + dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -1185,6 +1191,9 @@ int nf_conntrack_init(struct net *net) /* Set up fake conntrack: - to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8752031adc..da3cdc8db70 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1125,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 49ac8713b6d064adf7474080fdccebd7cce76be0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack count Sysctls and proc files are stubbed to init_net's one. This is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 3 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 2 +- net/netfilter/nf_conntrack_core.c | 18 ++++++++---------- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2b8d6efecf3..5999c5313d0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -288,7 +288,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; -extern atomic_t nf_conntrack_count; extern int nf_conntrack_max; DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 82d80b83477..edf84714d7c 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,9 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include + struct netns_ct { + atomic_t count; }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c44036..31abee3e29f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -254,7 +254,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3a020720e40..4556805027f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -314,7 +314,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + unsigned int nr_conntracks = atomic_read(&init_net.ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cefc338f6e5..8299b3490e7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -44,10 +44,6 @@ DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); -/* nf_conntrack_standalone needs this */ -atomic_t nf_conntrack_count = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(nf_conntrack_count); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); @@ -477,13 +473,13 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, } /* We don't want any race condition at early drop stage */ - atomic_inc(&nf_conntrack_count); + atomic_inc(&net->ct.count); if (nf_conntrack_max && - unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) { + unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); if (!early_drop(hash)) { - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -495,7 +491,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } @@ -516,10 +512,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) @@ -1024,7 +1021,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_event_cache_flush(); i_see_dead_people: nf_conntrack_flush(); - if (atomic_read(&nf_conntrack_count) != 0) { + if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; } @@ -1148,6 +1145,7 @@ int nf_conntrack_init(struct net *net) * entries. */ max_factor = 4; } + atomic_set(&net->ct.count, 0); nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &nf_conntrack_vmalloc); if (!nf_conntrack_hash) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 81dec17196d..021b505907d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -226,7 +226,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + unsigned int nr_conntracks = atomic_read(&init_net.ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -338,7 +338,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, -- cgit v1.2.3-70-g09d2 From 400dad39d1c33fe797e47326d87a3f54d0ac5181 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack hash * make per-netns conntrack hash Other solution is to add ->ct_net pointer to tuplehashes and still has one hash, I tried that it's ugly and requires more code deep down in protocol modules et al. * propagate netns pointer to where needed, e. g. to conntrack iterators. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 +- include/net/netfilter/nf_conntrack_core.h | 3 +- include/net/netns/conntrack.h | 2 + net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 74 +++++++++++----------- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 16 ++--- net/netfilter/nf_conntrack_pptp.c | 2 +- net/netfilter/nf_conntrack_proto.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 4 +- net/netfilter/xt_connlimit.c | 2 +- 16 files changed, 67 insertions(+), 63 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5999c5313d0..f5447f14304 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -195,11 +195,11 @@ extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(void); +extern void nf_conntrack_flush(struct net *net); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, @@ -261,7 +261,7 @@ extern struct nf_conn nf_conntrack_untracked; /* Iterate over all conntracks: if iter returns true, it's deleted. */ extern void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); +nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * nf_conntrack_alloc(struct net *net, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 532aa200cbc..1c373564396 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -48,7 +48,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); @@ -71,7 +71,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *proto); -extern struct hlist_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index edf84714d7c..b767683f112 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,5 +5,7 @@ struct netns_ct { atomic_t count; + struct hlist_head *hash; + int hash_vmalloc; }; #endif diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9a4822f8243..5e1c81791e5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -129,7 +129,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(&init_net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 31abee3e29f..03dd108015c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -323,7 +323,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 4556805027f..8e0afdc2b13 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(init_net.ct.hash[st->bucket].first); if (n) return n; } @@ -48,7 +48,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(init_net.ct.hash[st->bucket].first); } return head; } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index da8edcdaef3..daf346377b6 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(&init_net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6c6a3cba8d5..5d4a5b70da2 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -643,7 +643,7 @@ static int clean_nat(struct nf_conn *i, void *data) static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); + nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); nf_ct_l3proto_put(l3proto); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 5756f30ebc6..548cf4f15c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -156,7 +156,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple); + h = nf_conntrack_find_get(&init_net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8299b3490e7..da56b260552 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -50,15 +50,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct hlist_head *nf_conntrack_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash); - struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; HLIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); @@ -242,7 +238,7 @@ static void death_by_timeout(unsigned long ul_conntrack) } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; @@ -252,7 +248,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple) * at least once for the stats anyway. */ local_bh_disable(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); local_bh_enable(); @@ -268,13 +264,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); - h = __nf_conntrack_find(tuple); + h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) @@ -290,10 +286,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { + struct net *net = nf_ct_net(ct); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); + &net->ct.hash[hash]); hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + &net->ct.hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -319,8 +317,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn_help *help; struct hlist_node *n; enum ip_conntrack_info ctinfo; + struct net *net; ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -347,11 +347,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; @@ -394,6 +394,7 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { + struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); @@ -402,7 +403,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * least once for the stats anyway. */ rcu_read_lock_bh(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); @@ -421,7 +422,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(unsigned int hash) +static noinline int early_drop(struct net *net, unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; @@ -432,7 +433,7 @@ static noinline int early_drop(unsigned int hash) rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) @@ -478,7 +479,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); - if (!early_drop(hash)) { + if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING @@ -631,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) { h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, dataoff); @@ -941,7 +942,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; @@ -950,7 +951,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { + hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; @@ -969,13 +970,14 @@ found: return ct; } -void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +void nf_ct_iterate_cleanup(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data) { struct nf_conn *ct; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); @@ -1001,9 +1003,9 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(void) +void nf_conntrack_flush(struct net *net) { - nf_ct_iterate_cleanup(kill_all, NULL); + nf_ct_iterate_cleanup(net, kill_all, NULL); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); @@ -1020,7 +1022,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_event_cache_flush(); i_see_dead_people: - nf_conntrack_flush(); + nf_conntrack_flush(net); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; @@ -1032,7 +1034,7 @@ void nf_conntrack_cleanup(struct net *net) rcu_assign_pointer(nf_ct_destroy, NULL); kmem_cache_destroy(nf_conntrack_cachep); - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); nf_conntrack_acct_fini(); @@ -1097,8 +1099,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!hlist_empty(&nf_conntrack_hash[i])) { - h = hlist_entry(nf_conntrack_hash[i].first, + while (!hlist_empty(&init_net.ct.hash[i])) { + h = hlist_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnode); hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); @@ -1106,12 +1108,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) } } old_size = nf_conntrack_htable_size; - old_vmalloced = nf_conntrack_vmalloc; - old_hash = nf_conntrack_hash; + old_vmalloced = init_net.ct.hash_vmalloc; + old_hash = init_net.ct.hash; nf_conntrack_htable_size = hashsize; - nf_conntrack_vmalloc = vmalloced; - nf_conntrack_hash = hash; + init_net.ct.hash_vmalloc = vmalloced; + init_net.ct.hash = hash; nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); @@ -1146,9 +1148,9 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); - nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, - &nf_conntrack_vmalloc); - if (!nf_conntrack_hash) { + net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &net->ct.hash_vmalloc); + if (!net->ct.hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } @@ -1207,7 +1209,7 @@ out_fini_proto: err_free_conntrack_slab: kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_out: return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8e0b4c8f62a..d91278dfdaf 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -159,7 +159,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) hlist_for_each_entry(h, n, &unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) + hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode) unhelp(h, me); } spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da3cdc8db70..918a3358a12 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -549,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]], + hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -794,14 +794,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(); + nf_conntrack_flush(&init_net); return 0; } if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -847,7 +847,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -1213,9 +1213,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&otuple); + h = __nf_conntrack_find(&init_net, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&rtuple); + h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { struct nf_conntrack_tuple master; @@ -1230,7 +1230,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) goto out_unlock; - master_h = __nf_conntrack_find(&master); + master_h = __nf_conntrack_find(&init_net, &master); if (master_h == NULL) { err = -ENOENT; goto out_unlock; @@ -1670,7 +1670,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple); + h = nf_conntrack_find_get(&init_net, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 97e54b0e43a..7caf45b59d2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -143,7 +143,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(t); + h = nf_conntrack_find_get(&init_net, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a49fc932629..3a2f7ef997f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -219,7 +219,7 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l3proto, proto); + nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -328,7 +328,7 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l4proto, l4proto); + nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 021b505907d..5456e4b9424 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(init_net.ct.hash[st->bucket].first); if (n) return n; } @@ -67,7 +67,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(init_net.ct.hash[st->bucket].first); } return head; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index d2453d182d6..bd00830ff69 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&conn->tuple); + found = __nf_conntrack_find(&init_net, &conn->tuple); found_ct = NULL; if (found != NULL) -- cgit v1.2.3-70-g09d2 From 9b03f38d0487f3908696242286d934c9b38f9d2a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns expectations Make per-netns a) expectation hash and b) expectations count. Expectations always belongs to netns to which it's master conntrack belong. This is natural and doesn't bloat expectation. Proc files and leaf users are stubbed to init_net, this is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 20 +++++--- include/net/netns/conntrack.h | 3 ++ .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 6 ++- net/ipv4/netfilter/nf_nat_pptp.c | 2 +- net/netfilter/nf_conntrack_core.c | 8 ++-- net/netfilter/nf_conntrack_expect.c | 55 +++++++++++----------- net/netfilter/nf_conntrack_h323_main.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 13 ++--- net/netfilter/nf_conntrack_pptp.c | 4 +- net/netfilter/nf_conntrack_sip.c | 2 +- 11 files changed, 66 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4c4d894cb9b..37a7fc1164b 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,7 +6,6 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; @@ -56,6 +55,15 @@ struct nf_conntrack_expect struct rcu_head rcu; }; +static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) +{ +#ifdef CONFIG_NET_NS + return exp->master->ct_net; /* by definition */ +#else + return &init_net; +#endif +} + struct nf_conntrack_expect_policy { unsigned int max_expected; @@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy #define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_INACTIVE 0x2 -int nf_conntrack_expect_init(void); -void nf_conntrack_expect_fini(void); +int nf_conntrack_expect_init(struct net *net); +void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b767683f112..e453a33f3e9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,7 +5,10 @@ struct netns_ct { atomic_t count; + unsigned int expect_count; struct hlist_head *hash; + struct hlist_head *expect_hash; int hash_vmalloc; + int expect_vmalloc; }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8e0afdc2b13..f8636a57e8c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -177,11 +177,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -191,13 +192,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5..e4bdddc6034 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -73,7 +73,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(&init_net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index da56b260552..c188edea249 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -562,7 +562,7 @@ init_conntrack(struct net *net, nf_ct_acct_ext_add(ct, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(tuple); + exp = nf_ct_find_expectation(net, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -1038,7 +1038,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_htable_size); nf_conntrack_acct_fini(); - nf_conntrack_expect_fini(); + nf_conntrack_expect_fini(net); nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); } @@ -1173,7 +1173,7 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto err_free_conntrack_slab; - ret = nf_conntrack_expect_init(); + ret = nf_conntrack_expect_init(net); if (ret < 0) goto out_fini_proto; @@ -1203,7 +1203,7 @@ int nf_conntrack_init(struct net *net) out_fini_helper: nf_conntrack_helper_fini(); out_fini_expect: - nf_conntrack_expect_fini(); + nf_conntrack_expect_fini(net); out_fini_proto: nf_conntrack_proto_fini(); err_free_conntrack_slab: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e6a79f2a7c5..5307316356e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -28,17 +28,12 @@ #include #include -struct hlist_head *nf_ct_expect_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_expect_hash); - unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; -static unsigned int nf_ct_expect_count; unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; -static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -46,12 +41,13 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); - nf_ct_expect_count--; + net->ct.expect_count--; hlist_del(&exp->lnode); master_help->expecting[exp->class]--; @@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(tuple); + i = __nf_ct_expect_find(net, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { exp = i; @@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); @@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - nf_ct_expect_count++; + hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); @@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret; @@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) } } - if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net->ct.expect_count >= nf_ct_expect_max) { if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); @@ -430,11 +428,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -444,13 +443,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -558,7 +558,7 @@ static void exp_proc_remove(void) module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(struct net *net) { int err = -ENOMEM; @@ -569,9 +569,10 @@ int nf_conntrack_expect_init(void) } nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &nf_ct_expect_vmalloc); - if (nf_ct_expect_hash == NULL) + net->ct.expect_count = 0; + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &net->ct.expect_vmalloc); + if (net->ct.expect_hash == NULL) goto err1; nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", @@ -589,16 +590,16 @@ int nf_conntrack_expect_init(void) err3: kmem_cache_destroy(nf_ct_expect_cachep); err2: - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(void) +void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(); kmem_cache_destroy(nf_ct_expect_cachep); - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5dc0478108a..dfb826c973d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1219,7 +1219,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index d91278dfdaf..c793db810cd 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], hnode) { + &init_net.ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((help->helper == me || exp->helper == me) && del_timer(&exp->timeout)) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 918a3358a12..cadfd15b44f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1458,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); struct hlist_node *n; @@ -1467,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -1529,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1583,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1613,7 +1614,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); if (m_help->helper == h @@ -1629,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1724,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7caf45b59d2..5db7df5d19b 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(&init_net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -154,7 +154,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(t); + exp = nf_ct_expect_find_get(&init_net, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1fa306be60f..a006080eb38 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -775,7 +775,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || -- cgit v1.2.3-70-g09d2 From 63c9a26264be108b52de087724673f8664570e34 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: per-netns unconfirmed list What is confirmed connection in one netns can very well be unconfirmed in another one. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 1 - include/net/netns/conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 7 ++++--- net/netfilter/nf_conntrack_helper.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 1c373564396..b4b45c541da 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); extern spinlock_t nf_conntrack_lock ; -extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e453a33f3e9..6ddf58e142a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,7 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include #include struct netns_ct { @@ -8,6 +9,7 @@ struct netns_ct { unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; + struct hlist_head unconfirmed; int hash_vmalloc; int expect_vmalloc; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c188edea249..2a105db1330 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -54,7 +54,6 @@ struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; -HLIST_HEAD(unconfirmed); static struct kmem_cache *nf_conntrack_cachep __read_mostly; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); @@ -596,7 +595,8 @@ init_conntrack(struct net *net, } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &net->ct.unconfirmed); spin_unlock_bh(&nf_conntrack_lock); @@ -957,7 +957,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), goto found; } } - hlist_for_each_entry(h, n, &unconfirmed, hnode) { + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -1154,6 +1154,7 @@ int nf_conntrack_init(struct net *net) printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } + INIT_HLIST_HEAD(&net->ct.unconfirmed); nf_conntrack_max = max_factor * nf_conntrack_htable_size; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c793db810cd..920e778539a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -156,7 +156,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, n, &unconfirmed, hnode) + hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode) -- cgit v1.2.3-70-g09d2 From a702a65fc1376fc1f6757ec2a6960348af3f1876 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to nf_conntrack_in() It's deducible from skb->dev or skb->dst->dev, but we know netns at the moment of call, so pass it down and use for finding and creating conntracks. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 3 ++- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 24 ++++++++++++++++-------- net/netfilter/nf_conntrack_core.c | 15 ++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index b4b45c541da..e78afe7f28e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,8 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(u_int8_t pf, +extern unsigned int nf_conntrack_in(struct net *net, + u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 03dd108015c..2e4dd3fb002 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -172,7 +172,7 @@ static unsigned int ipv4_conntrack_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,7 +188,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 85050c072ab..e91db16611d 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_STOLEN; } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int __ipv6_conntrack_in(struct net *net, + unsigned int hooknum, + struct sk_buff *skb, + int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; @@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, reasm); + ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } @@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, skb); + return nf_conntrack_in(net, PF_INET6, hooknum, skb); +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2a105db1330..5c96d9732c7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,7 +611,8 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct sk_buff *skb, +resolve_normal_ct(struct net *net, + struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, @@ -632,10 +633,9 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, - dataoff); + h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -669,7 +669,8 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, + struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -709,8 +710,8 @@ nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) return -ret; } - ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, - &set_reply, &ctinfo); + ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(invalid); -- cgit v1.2.3-70-g09d2 From 74c51a1497033e6ff7b8096797daca233a4a30df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:05 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to L4 protocol's ->error hook Again, it's deducible from skb, but we're going to use it for nf_conntrack_checksum and statistics, so just pass it from upper layer. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 9 +++++---- net/netfilter/nf_conntrack_core.c | 12 +++++++----- net/netfilter/nf_conntrack_proto_dccp.c | 6 +++--- net/netfilter/nf_conntrack_proto_tcp.c | 3 ++- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 4 +++- 8 files changed, 26 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4376e97bae..97723d33c95 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -50,7 +50,7 @@ struct nf_conntrack_l4proto /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct sk_buff *skb, unsigned int dataoff, + int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index daf346377b6..8c7ed5bc959 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&init_net, &innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 548cf4f15c0..aabddfe2127 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -122,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct sk_buff *skb, +icmpv6_error_message(struct net *net, + struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, unsigned int hooknum) @@ -156,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&init_net, &intuple); + h = nf_conntrack_find_get(net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -172,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb, } static int -icmpv6_error(struct sk_buff *skb, unsigned int dataoff, +icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; @@ -197,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5c96d9732c7..251f020c7c1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -703,11 +703,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (l4proto->error != NULL && - (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); - return -ret; + if (l4proto->error != NULL) { + ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + if (ret <= 0) { + NF_CT_STAT_INC_ATOMIC(error); + NF_CT_STAT_INC_ATOMIC(invalid); + return -ret; + } } ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index edc30358dc1..6ead8da3e9e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -545,9 +545,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, - unsigned int hooknum) +static int dccp_error(struct net *net, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, + u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; unsigned int dccp_len = skb->len - dataoff; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 539a8202025..4e71de2405f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -746,7 +746,8 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct sk_buff *skb, +static int tcp_error(struct net *net, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2a965c4a0ea..8a245beb2c9 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -89,7 +89,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct sk_buff *skb, unsigned int dataoff, +static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4fb6c8d83a8..981701919a7 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -89,7 +89,9 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct sk_buff *skb, unsigned int dataoff, +static int udplite_error(struct net *net, + struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) -- cgit v1.2.3-70-g09d2 From a71996fccce4b2086a26036aa3c915365ca36926 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: pass conntrack to nf_conntrack_event_cache() not skb This is cleaner, we already know conntrack to which event is relevant. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 4 +--- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 10 +++++----- net/netfilter/nf_conntrack_ftp.c | 9 +++++---- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- net/netfilter/xt_CONNMARK.c | 8 ++++---- net/netfilter/xt_CONNSECMARK.c | 2 +- 13 files changed, 27 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index f0b9078235c..c1b406cecf9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct); extern void nf_ct_event_cache_flush(void); static inline void -nf_conntrack_event_cache(enum ip_conntrack_events event, - const struct sk_buff *skb) +nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_ecache *ecache; local_bh_disable(); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 8c7ed5bc959..205ba399d4a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 112dcfa1290..cf7a42bf982 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -193,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index aabddfe2127..df04de91e6e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -93,7 +93,7 @@ static int icmpv6_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 251f020c7c1..01f59c57730 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -370,14 +370,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, skb); + nf_conntrack_event_cache(IPCT_HELPER, ct); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, skb); + nf_conntrack_event_cache(IPCT_NATINFO, ct); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, skb); + IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: @@ -740,7 +740,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); return ret; } @@ -853,7 +853,7 @@ acct: /* must be unlocked when calling event cache */ if (event) - nf_conntrack_event_cache(event, skb); + nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index bb20672fe03..4f7107107e9 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, +static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, + struct nf_ct_ftp_master *info, int dir, struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } @@ -509,7 +510,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, skb); + update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c5a78220fa3..5b1273a01fe 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -229,7 +229,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b5a90596d3f..ae8c2609e23 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct, ct->proto.sctp.state = new_state; if (old_state != new_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } write_unlock_bh(&sctp_lock); @@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4e71de2405f..b5d62d66e02 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -969,9 +969,9 @@ static int tcp_packet(struct nf_conn *ct, timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -990,7 +990,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8a245beb2c9..e0ee89e179c 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 981701919a7..c5b77c8f86c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index e72e5d01752..e1415c3f5c9 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -54,7 +54,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -62,7 +62,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -95,7 +95,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -103,7 +103,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, (skb->mark & info->nfmask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index ae939e54dfa..5f221c3bd35 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb) ct = nf_ct_get(skb, &ctinfo); if (ct && !ct->secmark) { ct->secmark = skb->secmark; - nf_conntrack_event_cache(IPCT_SECMARK, skb); + nf_conntrack_event_cache(IPCT_SECMARK, ct); } } } -- cgit v1.2.3-70-g09d2 From 6058fa6bb96a5b6145cba10c5171f09c2783ca69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns event cache Heh, last minute proof-reading of this patch made me think, that this is actually unneeded, simply because "ct" pointers will be different for different conntracks in different netns, just like they are different in one netns. Not so sure anymore. [Patrick: pointers will be different, flushing can only be done while inactive though and thus it needs to be per netns] Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 22 ++++++++++++++++------ include/net/netns/conntrack.h | 5 +++++ net/netfilter/nf_conntrack_core.c | 12 +++++++++--- net/netfilter/nf_conntrack_ecache.c | 26 +++++++++++++++++++------- 4 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index c1b406cecf9..35f814c1e2c 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -8,6 +8,7 @@ #include #include +#include #include #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -15,9 +16,6 @@ struct nf_conntrack_ecache { struct nf_conn *ct; unsigned int events; }; -DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); - -#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x) extern struct atomic_notifier_head nf_conntrack_chain; extern int nf_conntrack_register_notifier(struct notifier_block *nb); @@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); extern void __nf_ct_event_cache_init(struct nf_conn *ct); -extern void nf_ct_event_cache_flush(void); +extern void nf_ct_event_cache_flush(struct net *net); static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ct != ecache->ct) __nf_ct_event_cache_init(ct); ecache->events |= event; @@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event, atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); } +extern int nf_conntrack_ecache_init(struct net *net); +extern void nf_conntrack_ecache_fini(struct net *net); + #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, @@ -67,7 +69,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event, static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} -static inline void nf_ct_event_cache_flush(void) {} +static inline void nf_ct_event_cache_flush(struct net *net) {} + +static inline int nf_conntrack_ecache_init(struct net *net) +{ + return 0; + +static inline void nf_conntrack_ecache_fini(struct net *net) +{ +} #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 6ddf58e142a..9d5c1623c51 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,12 +4,17 @@ #include #include +struct nf_conntrack_ecache; + struct netns_ct { atomic_t count; unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_ecache *ecache; +#endif int hash_vmalloc; int expect_vmalloc; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01f59c57730..b55944e5e4e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1023,7 +1023,8 @@ void nf_conntrack_cleanup(struct net *net) delete... */ synchronize_net(); - nf_ct_event_cache_flush(); + nf_ct_event_cache_flush(net); + nf_conntrack_ecache_fini(net); i_see_dead_people: nf_conntrack_flush(net); if (atomic_read(&net->ct.count) != 0) { @@ -1151,11 +1152,14 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); + ret = nf_conntrack_ecache_init(net); + if (ret < 0) + goto err_ecache; net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc); if (!net->ct.hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_out; + goto err_hash; } INIT_HLIST_HEAD(&net->ct.unconfirmed); @@ -1215,6 +1219,8 @@ err_free_conntrack_slab: err_free_hash: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); -err_out: +err_hash: + nf_conntrack_ecache_fini(net); +err_ecache: return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 83c41ac3505..a5f5e2e65d1 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain); ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); EXPORT_SYMBOL_GPL(nf_ct_expect_chain); -DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); -EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void @@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) * by code prior to async packet handling for freeing the skb */ void nf_ct_deliver_cached_events(const struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ecache->ct == ct) __nf_ct_deliver_cached_events(ecache); local_bh_enable(); @@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); /* Deliver cached events for old pending events, if current conntrack != old */ void __nf_ct_event_cache_init(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); BUG_ON(ecache->ct == ct); if (ecache->ct) __nf_ct_deliver_cached_events(ecache); @@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); /* flush the event cache - touches other CPU's data and must not be called * while packets are still passing through the code */ -void nf_ct_event_cache_flush(void) +void nf_ct_event_cache_flush(struct net *net) { struct nf_conntrack_ecache *ecache; int cpu; for_each_possible_cpu(cpu) { - ecache = &per_cpu(nf_conntrack_ecache, cpu); + ecache = per_cpu_ptr(net->ct.ecache, cpu); if (ecache->ct) nf_ct_put(ecache->ct); } } +int nf_conntrack_ecache_init(struct net *net) +{ + net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache); + if (!net->ct.ecache) + return -ENOMEM; + return 0; +} + +void nf_conntrack_ecache_fini(struct net *net) +{ + free_percpu(net->ct.ecache); +} + int nf_conntrack_register_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&nf_conntrack_chain, nb); -- cgit v1.2.3-70-g09d2 From 0d55af8791bfb42e04cc456b348910582f230343 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns statistics Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 8 ++-- include/net/netns/conntrack.h | 1 + .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/netfilter/nf_conntrack_core.c | 49 ++++++++++++---------- net/netfilter/nf_conntrack_expect.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 4 +- 6 files changed, 38 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f5447f14304..c95561050f7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; extern int nf_conntrack_max; -DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) -#define NF_CT_STAT_INC_ATOMIC(count) \ +#define NF_CT_STAT_INC(net, count) \ + (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++) +#define NF_CT_STAT_INC_ATOMIC(net, count) \ do { \ local_bh_disable(); \ - __get_cpu_var(nf_conntrack_stat).count++; \ + per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \ local_bh_enable(); \ } while (0) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9d5c1623c51..fc0a46d64cc 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -12,6 +12,7 @@ struct netns_ct { struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; + struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index b2940836d10..fdc85b37078 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; @@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b55944e5e4e..1e87fa0cd3a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -56,9 +56,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -171,6 +168,7 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; + struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -203,7 +201,7 @@ destroy_conntrack(struct nf_conntrack *nfct) hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } - NF_CT_STAT_INC(delete); + NF_CT_STAT_INC(net, delete); spin_unlock_bh(&nf_conntrack_lock); if (ct->master) @@ -216,6 +214,7 @@ destroy_conntrack(struct nf_conntrack *nfct) static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct net *net = nf_ct_net(ct); struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -230,7 +229,7 @@ static void death_by_timeout(unsigned long ul_conntrack) spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ - NF_CT_STAT_INC(delete_list); + NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); @@ -249,11 +248,11 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); local_bh_enable(); return h; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } local_bh_enable(); @@ -366,7 +365,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); + NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) @@ -381,7 +380,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: - NF_CT_STAT_INC(insert_failed); + NF_CT_STAT_INC(net, insert_failed); spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } @@ -405,11 +404,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } rcu_read_unlock_bh(); @@ -454,7 +453,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - NF_CT_STAT_INC_ATOMIC(early_drop); + NF_CT_STAT_INC_ATOMIC(net, early_drop); } nf_ct_put(ct); return dropped; @@ -581,7 +580,7 @@ init_conntrack(struct net *net, ct->secmark = exp->master->secmark; #endif nf_conntrack_get(&ct->master->ct_general); - NF_CT_STAT_INC(expect_new); + NF_CT_STAT_INC(net, expect_new); } else { struct nf_conntrack_helper *helper; @@ -591,7 +590,7 @@ init_conntrack(struct net *net, if (help) rcu_assign_pointer(help->helper, helper); } - NF_CT_STAT_INC(new); + NF_CT_STAT_INC(net, new); } /* Overload tuple linked list to put us in unconfirmed list. */ @@ -683,7 +682,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(ignore); + NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } @@ -693,8 +692,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } @@ -706,8 +705,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (l4proto->error != NULL) { ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); if (ret <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } } @@ -716,13 +715,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - NF_CT_STAT_INC_ATOMIC(drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } @@ -735,7 +734,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(skb->nfct); skb->nfct = NULL; - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } @@ -1043,6 +1042,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_acct_fini(); nf_conntrack_expect_fini(net); + free_percpu(net->ct.stat); nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); } @@ -1152,6 +1152,9 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); + net->ct.stat = alloc_percpu(struct ip_conntrack_stat); + if (!net->ct.stat) + goto err_stat; ret = nf_conntrack_ecache_init(net); if (ret < 0) goto err_ecache; @@ -1222,5 +1225,7 @@ err_free_hash: err_hash: nf_conntrack_ecache_fini(net); err_ecache: + free_percpu(net->ct.stat); +err_stat: return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 6a09200049e..b7f75117161 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -53,7 +53,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) master_help->expecting[exp->class]--; nf_ct_expect_put(exp); - NF_CT_STAT_INC(expect_delete); + NF_CT_STAT_INC(net, expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -326,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); + NF_CT_STAT_INC(net, expect_create); } /* Race with expectations being used means we could have none to find; OK. */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 02eaf872277..a4fdbbf363a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -203,7 +203,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; @@ -217,7 +217,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; -- cgit v1.2.3-70-g09d2 From 802507071b72ed5025747126099cbc6d1542f596 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_count sysctl Note, sysctl table is always duplicated, this is simpler and less special-cased. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 4 ++ net/netfilter/nf_conntrack_standalone.c | 73 ++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fc0a46d64cc..2b50758df6a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,6 +4,7 @@ #include #include +struct ctl_table_header; struct nf_conntrack_ecache; struct netns_ct { @@ -15,6 +16,9 @@ struct netns_ct { struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; +#endif +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 169760ddc16..64b4f95b367 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum); static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static struct ctl_table_header *nf_ct_sysctl_header; static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { @@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = { EXPORT_SYMBOL_GPL(nf_ct_log_invalid); -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { - nf_ct_netfilter_header = - register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - - nf_ct_sysctl_header = - register_sysctl_paths(nf_net_netfilter_sysctl_path, - nf_ct_sysctl_table); - if (!nf_ct_sysctl_header) + struct ctl_table *table; + + if (net_eq(net, &init_net)) { + nf_ct_netfilter_header = + register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) + goto out; + } + + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out_kmemdup; + + table[1].data = &net->ct.count; + + net->ct.sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.sysctl_header) goto out_unregister_netfilter; return 0; out_unregister_netfilter: - unregister_sysctl_table(nf_ct_netfilter_header); + kfree(table); +out_kmemdup: + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); out: printk("nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { - unregister_sysctl_table(nf_ct_netfilter_header); - unregister_sysctl_table(nf_ct_sysctl_header); + struct ctl_table *table; + + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); + table = net->ct.sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.sysctl_header); + kfree(table); } #else -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { } #endif /* CONFIG_SYSCTL */ @@ -457,8 +474,13 @@ static int nf_conntrack_net_init(struct net *net) ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + ret = nf_conntrack_standalone_init_sysctl(net); + if (ret < 0) + goto out_sysctl; return 0; +out_sysctl: + nf_conntrack_standalone_fini_proc(net); out_proc: nf_conntrack_cleanup(net); out_init: @@ -467,6 +489,7 @@ out_init: static void nf_conntrack_net_exit(struct net *net) { + nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_proc(net); nf_conntrack_cleanup(net); } @@ -478,25 +501,11 @@ static struct pernet_operations nf_conntrack_net_ops = { static int __init nf_conntrack_standalone_init(void) { - int ret; - - ret = register_pernet_subsys(&nf_conntrack_net_ops); - if (ret < 0) - goto out; - ret = nf_conntrack_standalone_init_sysctl(); - if (ret < 0) - goto out_sysctl; - return 0; - -out_sysctl: - unregister_pernet_subsys(&nf_conntrack_net_ops); -out: - return ret; + return register_pernet_subsys(&nf_conntrack_net_ops); } static void __exit nf_conntrack_standalone_fini(void) { - nf_conntrack_standalone_fini_sysctl(); unregister_pernet_subsys(&nf_conntrack_net_ops); } -- cgit v1.2.3-70-g09d2 From c04d05529a6e0bf97183a2caf76a0c7f07f5b78c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_checksum sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +++---- 10 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c95561050f7..b76a8685b5b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; -extern int nf_conntrack_checksum; extern int nf_conntrack_max; #define NF_CT_STAT_INC(net, count) \ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 2b50758df6a..38b6dae4d3d 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,6 +17,7 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_checksum; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2e4dd3fb002..75871b1dd8a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 205ba399d4a..ace66cbf921 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index df04de91e6e..fa12e57749a 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -187,7 +187,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6ead8da3e9e..769680e68b5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -575,7 +575,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb, } } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, pf)) { msg = "nf_ct_dccp: bad checksum "; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5d62d66e02..131c9be4470 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -780,7 +780,7 @@ static int tcp_error(struct net *net, * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index e0ee89e179c..3d3fffe3f8b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -123,7 +123,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index c5b77c8f86c..3d1697c4f91 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -129,7 +129,7 @@ static int udplite_error(struct net *net, } /* Checksum invalid? Ignore. */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDPLITE)) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 64b4f95b367..5cd06637977 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -322,9 +322,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Sysctl support */ -int nf_conntrack_checksum __read_mostly = 1; -EXPORT_SYMBOL_GPL(nf_conntrack_checksum); - #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; @@ -360,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -425,6 +422,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[3].data = &net->ct.sysctl_checksum; net->ct.sysctl_header = register_net_sysctl_table(net, nf_net_netfilter_sysctl_path, table); @@ -474,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net) ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + net->ct.sysctl_checksum = 1; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; -- cgit v1.2.3-70-g09d2 From c2a2c7e0cc39e7f9336cd67e8307a110bdba82f3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++-------- include/net/netns/conntrack.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++--- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_proto_dccp.c | 10 ++++++---- net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++-------- net/netfilter/nf_conntrack_proto_udp.c | 6 +++--- net/netfilter/nf_conntrack_proto_udplite.c | 8 ++++---- net/netfilter/nf_conntrack_standalone.c | 6 +++--- 11 files changed, 39 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 97723d33c95..7f2f43c7728 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); extern const struct nla_policy nf_ct_port_nla_policy[]; -/* Log invalid packets */ -extern unsigned int nf_ct_log_invalid; - #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS -#define LOG_INVALID(proto) \ - (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) +#define LOG_INVALID(net, proto) \ + ((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) #else -#define LOG_INVALID(proto) \ - ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \ +#define LOG_INVALID(net, proto) \ + (((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \ && net_ratelimit()) #endif #else -#define LOG_INVALID(proto) 0 +#define LOG_INVALID(net, proto) 0 #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b6dae4d3d..503e37551b1 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,6 +18,7 @@ struct netns_ct { struct nf_conntrack_ecache *ecache; #endif int sysctl_checksum; + unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 75871b1dd8a..af69acc1d0f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index ace66cbf921..4e887922022 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -181,7 +181,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; @@ -190,7 +190,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -203,7 +203,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index fa12e57749a..05726177903 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -181,7 +181,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(IPPROTO_ICMPV6)) + if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1e87fa0cd3a..ade0bb3ab2e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -unsigned int nf_ct_log_invalid __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; static int nf_conntrack_hash_rnd_initted; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 769680e68b5..8fcf1762fab 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { + struct net *net = nf_ct_net(ct); struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); return false; } @@ -463,6 +464,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; @@ -590,7 +592,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 131c9be4470..f947ec41e39 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -488,6 +488,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct tcphdr *tcph, u_int8_t pf) { + struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; - if (!res && LOG_INVALID(IPPROTO_TCP)) + if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -761,7 +762,7 @@ static int tcp_error(struct net *net, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; @@ -769,7 +770,7 @@ static int tcp_error(struct net *net, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; @@ -782,7 +783,7 @@ static int tcp_error(struct net *net, /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; @@ -791,7 +792,7 @@ static int tcp_error(struct net *net, /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; @@ -808,6 +809,7 @@ static int tcp_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -886,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct, * thus initiate a clean new session. */ write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); @@ -899,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct, segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; @@ -908,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3d3fffe3f8b..7c2ca48698b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -101,7 +101,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; @@ -109,7 +109,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; @@ -125,7 +125,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 3d1697c4f91..d22d839e4f9 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -104,7 +104,7 @@ static int udplite_error(struct net *net, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; @@ -114,7 +114,7 @@ static int udplite_error(struct net *net, if (cscov == 0) cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; @@ -122,7 +122,7 @@ static int udplite_error(struct net *net, /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; @@ -132,7 +132,7 @@ static int udplite_error(struct net *net, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5cd06637977..98106d4e89f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -365,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -403,8 +403,6 @@ static struct ctl_path nf_ct_path[] = { { } }; -EXPORT_SYMBOL_GPL(nf_ct_log_invalid); - static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; @@ -423,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[1].data = &net->ct.count; table[3].data = &net->ct.sysctl_checksum; + table[4].data = &net->ct.sysctl_log_invalid; net->ct.sysctl_header = register_net_sysctl_table(net, nf_net_netfilter_sysctl_path, table); @@ -473,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net) if (ret < 0) goto out_proc; net->ct.sysctl_checksum = 1; + net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; -- cgit v1.2.3-70-g09d2 From d716a4dfbbdf0d4731d596a96e5f4b0d892ac168 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:09 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack accounting Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_acct.h | 10 +-- include/net/netns/conntrack.h | 2 + net/netfilter/nf_conntrack_acct.c | 100 +++++++++++++++++++++--------- net/netfilter/nf_conntrack_core.c | 4 +- 4 files changed, 81 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 5d5ae55d54c..03e218f0be4 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -8,6 +8,7 @@ #ifndef _NF_CONNTRACK_ACCT_H #define _NF_CONNTRACK_ACCT_H +#include #include #include #include @@ -18,8 +19,6 @@ struct nf_conn_counter { u_int64_t bytes; }; -extern int nf_ct_acct; - static inline struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) { @@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) static inline struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { + struct net *net = nf_ct_net(ct); struct nf_conn_counter *acct; - if (!nf_ct_acct) + if (!net->ct.sysctl_acct) return NULL; acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); @@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) extern unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); -extern int nf_conntrack_acct_init(void); -extern void nf_conntrack_acct_fini(void); +extern int nf_conntrack_acct_init(struct net *net); +extern void nf_conntrack_acct_fini(struct net *net); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 503e37551b1..f4498a62881 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,10 +17,12 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_acct; int sysctl_checksum; unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; + struct ctl_table_header *acct_sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 59bd8b903a1..03591d37b9c 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -22,19 +22,17 @@ #define NF_CT_ACCT_DEFAULT 0 #endif -int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; -EXPORT_SYMBOL_GPL(nf_ct_acct); +static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); #ifdef CONFIG_SYSCTL -static struct ctl_table_header *acct_sysctl_header; static struct ctl_table acct_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_acct", - .data = &nf_ct_acct, + .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = { .id = NF_CT_EXT_ACCT, }; -int nf_conntrack_acct_init(void) +#ifdef CONFIG_SYSCTL +static int nf_conntrack_acct_init_sysctl(struct net *net) { - int ret; + struct ctl_table *table; -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif + table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_acct; - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - return ret; + net->ct.acct_sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.acct_sysctl_header) { + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + goto out_register; } + return 0; -#ifdef CONFIG_SYSCTL - acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, - acct_sysctl_table); +out_register: + kfree(table); +out: + return -ENOMEM; +} - if (!acct_sysctl_header) { - nf_ct_extend_unregister(&acct_extend); +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ + struct ctl_table *table; - printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); - return -ENOMEM; - } + table = net->ct.acct_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.acct_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_acct_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ +} +#endif + +int nf_conntrack_acct_init(struct net *net) +{ + int ret; + + net->ct.sysctl_acct = nf_ct_acct; + + if (net_eq(net, &init_net)) { +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); #endif + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_acct_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); +out_extend_register: + return ret; } -void nf_conntrack_acct_fini(void) +void nf_conntrack_acct_fini(struct net *net) { -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(acct_sysctl_header); -#endif - nf_ct_extend_unregister(&acct_extend); + nf_conntrack_acct_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ade0bb3ab2e..bb26460d897 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1039,7 +1039,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); - nf_conntrack_acct_fini(); + nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); free_percpu(net->ct.stat); nf_conntrack_helper_fini(); @@ -1191,7 +1191,7 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto out_fini_expect; - ret = nf_conntrack_acct_init(); + ret = nf_conntrack_acct_init(net); if (ret < 0) goto out_fini_helper; -- cgit v1.2.3-70-g09d2 From e099a173573ce1ba171092aee7bb3c72ea686e59 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nat: per-netns NAT table Same story as with iptable_filter, iptables_raw tables. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 1 + net/ipv4/netfilter/nf_nat_rule.c | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a6ed83853dc..b286b840493 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -38,6 +38,7 @@ struct netns_ipv4 { struct xt_table *iptable_raw; struct xt_table *arptable_filter; struct xt_table *iptable_security; + struct xt_table *nat_table; #endif int sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439..0a02a8caf3b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,14 +58,13 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ static unsigned int ipt_snat_target(struct sk_buff *skb, @@ -194,9 +193,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +226,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +265,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +274,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } -- cgit v1.2.3-70-g09d2 From 0c4c9288ada0e6642d511ef872f10a4781a896ff Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:11 +0200 Subject: netfilter: netns nat: per-netns bysource hash Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ net/ipv4/netfilter/nf_nat_core.c | 72 +++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b286b840493..ece1c926b5d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -39,6 +39,8 @@ struct netns_ipv4 { struct xt_table *arptable_filter; struct xt_table *iptable_security; struct xt_table *nat_table; + struct hlist_head *nat_bysource; + int nat_vmalloced; #endif int sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 5d4a5b70da2..2ac9eaf1a8c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +634,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -629,23 +661,9 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); -- cgit v1.2.3-70-g09d2 From 73e4022f78acdbe420e8c24a7afbd90f4c8f5077 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: split netfilter IPv4 defragmentation into a separate module Netfilter connection tracking requires all IPv4 packets to be defragmented. Both the socket match and the TPROXY target depend on this functionality, so this patch separates the Netfilter IPv4 defrag hooks into a separate module. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv4/nf_defrag_ipv4.h | 6 ++ net/ipv4/netfilter/Kconfig | 5 ++ net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 56 +-------------- net/ipv4/netfilter/nf_defrag_ipv4.c | 96 ++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 53 deletions(-) create mode 100644 include/net/netfilter/ipv4/nf_defrag_ipv4.h create mode 100644 net/ipv4/netfilter/nf_defrag_ipv4.c (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h new file mode 100644 index 00000000000..6b00ea38546 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV4_H +#define _NF_DEFRAG_IPV4_H + +extern void nf_defrag_ipv4_enable(void); + +#endif /* _NF_DEFRAG_IPV4_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 4e842d56642..07757ac8d5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1107edbe478..5f9b650d90f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index af69acc1d0f..4a7c3527539 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team * @@ -24,6 +25,7 @@ #include #include #include +#include int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,28 +129,6 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -194,13 +157,6 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { - { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, @@ -208,13 +164,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, - { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 00000000000..aa2c50a180f --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,96 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif + + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 9ad2d745a23853927a19789b034d9eb2e62d78ee Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables tproxy core The iptables tproxy core is a module that contains the common routines used by various tproxy related modules (TPROXY target and socket match) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 32 ++++++++++++ net/netfilter/Kconfig | 15 ++++++ net/netfilter/Makefile | 3 ++ net/netfilter/nf_tproxy_core.c | 96 ++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 include/net/netfilter/nf_tproxy_core.h create mode 100644 net/netfilter/nf_tproxy_core.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h new file mode 100644 index 00000000000..208b46f4d6d --- /dev/null +++ b/include/net/netfilter/nf_tproxy_core.h @@ -0,0 +1,32 @@ +#ifndef _NF_TPROXY_CORE_H +#define _NF_TPROXY_CORE_H + +#include +#include +#include +#include +#include +#include + +/* look up and get a reference to a matching socket */ +extern struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening); + +static inline void +nf_tproxy_put_sock(struct sock *sk) +{ + /* TIME_WAIT inet sockets have to be handled differently */ + if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + +/* assign a socket to the skb -- consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); + +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4a464857f21..ed1dcfb61e1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -287,6 +287,21 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +# transparent proxy support +config NETFILTER_TPROXY + tristate "Transparent proxying support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option enables transparent proxying support, that is, + support for handling non-locally bound IPv4 TCP and UDP sockets. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f101cf61e6f..fc8bbb48d38 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +# transparent proxy support +obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c new file mode 100644 index 00000000000..fe34f4bf74c --- /dev/null +++ b/net/netfilter/nf_tproxy_core.c @@ -0,0 +1,96 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include +#include +#include + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening_only) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + if (listening_only) + sk = __inet_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + else + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +static void +nf_tproxy_destructor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb->sk = NULL; + skb->destructor = NULL; + + if (sk) + nf_tproxy_put_sock(sk); +} + +/* consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + if (inet_sk(sk)->transparent) { + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; + return 1; + } else + nf_tproxy_put_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); + +static int __init nf_tproxy_init(void) +{ + pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); + pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); + return 0; +} + +module_init(nf_tproxy_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Transparent proxy support core routines"); -- cgit v1.2.3-70-g09d2