diff options
author | Patrick McHardy <kaber@trash.net> | 2013-08-27 08:50:12 +0200 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2013-08-28 00:26:48 +0200 |
commit | 41d73ec053d2424599c4ed8452b889374d523ade (patch) | |
tree | 404e0418e7f4c06cd37065eee97f67f6123df160 /net/netfilter | |
parent | 706f5151e349a3d8ab85237d0d6c553930376e9f (diff) |
netfilter: nf_conntrack: make sequence number adjustments usuable without NAT
Split out sequence number adjustments from NAT and move them to the conntrack
core to make them usable for SYN proxying. The sequence number adjustment
information is moved to a seperate extend. The extend is added to new
conntracks when a NAT mapping is set up for a connection using a helper.
As a side effect, this saves 24 bytes per connection with NAT in the common
case that a connection does not have a helper assigned.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Tested-by: Martin Topholm <mph@one.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/Makefile | 2 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 16 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 115 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 18 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_seqadj.c | 218 | ||||
-rw-r--r-- | net/netfilter/nf_nat_core.c | 16 | ||||
-rw-r--r-- | net/netfilter/nf_nat_helper.c | 228 | ||||
-rw-r--r-- | net/netfilter/nf_nat_sip.c | 3 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue_ct.c | 8 |
9 files changed, 296 insertions, 328 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ebfa7dc747c..89a9c1658f5 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,6 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o -nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o +nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index da6f1787a10..00a7a94d413 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -39,6 +39,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> @@ -1326,6 +1327,7 @@ void nf_conntrack_cleanup_end(void) nf_ct_extend_unregister(&nf_ct_zone_extend); #endif nf_conntrack_proto_fini(); + nf_conntrack_seqadj_fini(); nf_conntrack_labels_fini(); nf_conntrack_helper_fini(); nf_conntrack_timeout_fini(); @@ -1531,6 +1533,10 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_labels; + ret = nf_conntrack_seqadj_init(); + if (ret < 0) + goto err_seqadj; + #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) @@ -1555,6 +1561,8 @@ err_proto: nf_ct_extend_unregister(&nf_ct_zone_extend); err_extend: #endif + nf_conntrack_seqadj_fini(); +err_seqadj: nf_conntrack_labels_fini(); err_labels: nf_conntrack_helper_fini(); @@ -1577,9 +1585,6 @@ void nf_conntrack_init_end(void) /* For use by REJECT target */ RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); - - /* Howto get NAT offsets */ - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); } /* @@ -1666,8 +1671,3 @@ err_slabname: err_stat: return ret; } - -s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq); -EXPORT_SYMBOL_GPL(nf_ct_nat_offset); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fa61fea6323..7c55745ecec 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -37,6 +37,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -381,9 +382,8 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NF_NAT_NEEDED static int -dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) +dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) { struct nlattr *nest_parms; @@ -391,12 +391,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type) if (!nest_parms) goto nla_put_failure; - if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS, - htonl(natseq->correction_pos)) || - nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE, - htonl(natseq->offset_before)) || - nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER, - htonl(natseq->offset_after))) + if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS, + htonl(seq->correction_pos)) || + nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE, + htonl(seq->offset_before)) || + nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER, + htonl(seq->offset_after))) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -408,27 +408,24 @@ nla_put_failure: } static inline int -ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) +ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) { - struct nf_nat_seq *natseq; - struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *seq; - if (!(ct->status & IPS_SEQ_ADJUST) || !nat) + if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) return 0; - natseq = &nat->seq[IP_CT_DIR_ORIGINAL]; - if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1) + seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; + if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) return -1; - natseq = &nat->seq[IP_CT_DIR_REPLY]; - if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1) + seq = &seqadj->seq[IP_CT_DIR_REPLY]; + if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) return -1; return 0; } -#else -#define ctnetlink_dump_nat_seq_adj(a, b) (0) -#endif static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) @@ -502,7 +499,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -707,8 +704,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; - if (events & (1 << IPCT_NATSEQADJ) && - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + if (events & (1 << IPCT_SEQADJ) && + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; } @@ -1439,66 +1436,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] return err; } -#ifdef CONFIG_NF_NAT_NEEDED -static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { - [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 }, - [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 }, - [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 }, +static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = { + [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 }, + [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 }, + [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 }, }; static inline int -change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) +change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr) { int err; - struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; + struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy); if (err < 0) return err; - if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) + if (!cda[CTA_SEQADJ_CORRECTION_POS]) return -EINVAL; - natseq->correction_pos = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS])); + seq->correction_pos = + ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS])); - if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE]) + if (!cda[CTA_SEQADJ_OFFSET_BEFORE]) return -EINVAL; - natseq->offset_before = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE])); + seq->offset_before = + ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE])); - if (!cda[CTA_NAT_SEQ_OFFSET_AFTER]) + if (!cda[CTA_SEQADJ_OFFSET_AFTER]) return -EINVAL; - natseq->offset_after = - ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER])); + seq->offset_after = + ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER])); return 0; } static int -ctnetlink_change_nat_seq_adj(struct nf_conn *ct, - const struct nlattr * const cda[]) +ctnetlink_change_seq_adj(struct nf_conn *ct, + const struct nlattr * const cda[]) { + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); int ret = 0; - struct nf_conn_nat *nat = nfct_nat(ct); - if (!nat) + if (!seqadj) return 0; - if (cda[CTA_NAT_SEQ_ADJ_ORIG]) { - ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL], - cda[CTA_NAT_SEQ_ADJ_ORIG]); + if (cda[CTA_SEQ_ADJ_ORIG]) { + ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], + cda[CTA_SEQ_ADJ_ORIG]); if (ret < 0) return ret; ct->status |= IPS_SEQ_ADJUST; } - if (cda[CTA_NAT_SEQ_ADJ_REPLY]) { - ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY], - cda[CTA_NAT_SEQ_ADJ_REPLY]); + if (cda[CTA_SEQ_ADJ_REPLY]) { + ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], + cda[CTA_SEQ_ADJ_REPLY]); if (ret < 0) return ret; @@ -1507,7 +1503,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, return 0; } -#endif static int ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) @@ -1573,13 +1568,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct, ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); #endif -#ifdef CONFIG_NF_NAT_NEEDED - if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { - err = ctnetlink_change_nat_seq_adj(ct, cda); + if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { + err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) return err; } -#endif + if (cda[CTA_LABELS]) { err = ctnetlink_attach_labels(ct, cda); if (err < 0) @@ -1684,13 +1678,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } -#ifdef CONFIG_NF_NAT_NEEDED - if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { - err = ctnetlink_change_nat_seq_adj(ct, cda); + if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { + err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) goto err2; } -#endif memset(&ct->proto, 0, sizeof(ct->proto)); if (cda[CTA_PROTOINFO]) { @@ -1804,7 +1796,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | - (1 << IPCT_NATSEQADJ) | + (1 << IPCT_SEQADJ) | (1 << IPCT_MARK) | events, ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); @@ -1827,7 +1819,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_HELPER) | (1 << IPCT_LABEL) | (1 << IPCT_PROTOINFO) | - (1 << IPCT_NATSEQADJ) | + (1 << IPCT_SEQADJ) | (1 << IPCT_MARK), ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); @@ -2082,7 +2074,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; if ((ct->status & IPS_SEQ_ADJUST) && - ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK @@ -2211,6 +2203,7 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .build = ctnetlink_nfqueue_build, .parse = ctnetlink_nfqueue_parse, .attach_expect = ctnetlink_nfqueue_attach_expect, + .seq_adjust = nf_ct_tcp_seqadj_set, }; #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d224e001f14..984a8d1a335 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -27,6 +27,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> @@ -495,21 +496,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -#ifdef CONFIG_NF_NAT_NEEDED -static inline s32 nat_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); - - return get_offset != NULL ? get_offset(ct, dir, seq) : 0; -} -#define NAT_OFFSET(ct, dir, seq) \ - (nat_offset(ct, dir, seq)) -#else -#define NAT_OFFSET(ct, dir, seq) 0 -#endif - static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, @@ -540,7 +526,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ - receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); + receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c new file mode 100644 index 00000000000..483eb9ce321 --- /dev/null +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -0,0 +1,218 @@ +#include <linux/types.h> +#include <linux/netfilter.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_seqadj.h> + +int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s32 off) +{ + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_ct_seqadj *this_way; + + if (off == 0) + return 0; + + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + + spin_lock_bh(&ct->lock); + this_way = &seqadj->seq[dir]; + if (this_way->offset_before == this_way->offset_after || + before(this_way->correction_pos, seq)) { + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += off; + } + spin_unlock_bh(&ct->lock); + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_seqadj_set); + +void nf_ct_tcp_seqadj_set(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + s32 off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb)); + nf_ct_seqadj_set(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set); + +/* Adjust one found SACK option including checksum correction */ +static void nf_ct_sack_block_adjust(struct sk_buff *skb, + struct tcphdr *tcph, + unsigned int sackoff, + unsigned int sackend, + struct nf_ct_seqadj *seq) +{ + while (sackoff < sackend) { + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; + + sack = (void *)skb->data + sackoff; + if (after(ntohl(sack->start_seq) - seq->offset_before, + seq->correction_pos)) + new_start_seq = htonl(ntohl(sack->start_seq) - + seq->offset_after); + else + new_start_seq = htonl(ntohl(sack->start_seq) - + seq->offset_before); + + if (after(ntohl(sack->end_seq) - seq->offset_before, + seq->correction_pos)) + new_end_seq = htonl(ntohl(sack->end_seq) - + seq->offset_after); + else + new_end_seq = htonl(ntohl(sack->end_seq) - + seq->offset_before); + + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); + + inet_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + inet_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); + sack->start_seq = new_start_seq; + sack->end_seq = new_end_seq; + sackoff += sizeof(*sack); + } +} + +/* TCP SACK sequence number adjustment */ +static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *tcph, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dir, optoff, optend; + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + tcph->doff * 4; + + if (!skb_make_writable(skb, optend)) + return 0; + + dir = CTINFO2DIR(ctinfo); + + while (optoff < optend) { + /* Usually: option, length. */ + unsigned char *op = skb->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + /* no partial options */ + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_SACK && + op[1] >= 2+TCPOLEN_SACK_PERBLOCK && + ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) + nf_ct_sack_block_adjust(skb, tcph, optoff + 2, + optoff+op[1], + &seqadj->seq[!dir]); + optoff += op[1]; + } + } + return 1; +} + +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ +int nf_ct_seq_adjust(struct sk_buff *skb, + struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int protoff) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct tcphdr *tcph; + __be32 newseq, newack; + s32 seqoff, ackoff; + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *this_way, *other_way; + int res; + + this_way = &seqadj->seq[dir]; + other_way = &seqadj->seq[!dir]; + + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) + return 0; + + tcph = (void *)skb->data + protoff; + spin_lock_bh(&ct->lock); + if (after(ntohl(tcph->seq), this_way->correction_pos)) + seqoff = this_way->offset_after; + else + seqoff = this_way->offset_before; + + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, + other_way->correction_pos)) + ackoff = other_way->offset_after; + else + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); + + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); + + tcph->seq = newseq; + tcph->ack_seq = newack; + + res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); + spin_unlock_bh(&ct->lock); + + return res; +} +EXPORT_SYMBOL_GPL(nf_ct_seq_adjust); + +s32 nf_ct_seq_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); + struct nf_ct_seqadj *this_way; + + if (!seqadj) + return 0; + + this_way = &seqadj->seq[dir]; + return after(seq, this_way->correction_pos) ? + this_way->offset_after : this_way->offset_before; +} +EXPORT_SYMBOL_GPL(nf_ct_seq_offset); + +static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = { + .len = sizeof(struct nf_conn_seqadj), + .align = __alignof__(struct nf_conn_seqadj), + .id = NF_CT_EXT_SEQADJ, +}; + +int nf_conntrack_seqadj_init(void) +{ + return nf_ct_extend_register(&nf_ct_seqadj_extend); +} + +void nf_conntrack_seqadj_fini(void) +{ + nf_ct_extend_unregister(&nf_ct_seqadj_extend); +} diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6ff808375b5..6f0f4f7f68a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -25,6 +25,7 @@ #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_nat.h> @@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; + + if (nfct_help(ct)) + nfct_seqadj_ext_add(ct); } if (maniptype == NF_NAT_MANIP_SRC) { @@ -764,10 +768,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; -static struct nfq_ct_nat_hook nfq_ct_nat = { - .seq_adjust = nf_nat_tcp_seq_adjust, -}; - static int __init nf_nat_init(void) { int ret; @@ -787,14 +787,9 @@ static int __init nf_nat_init(void) /* Initialize fake conntrack so that NAT will skip it */ nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - BUG_ON(nf_nat_seq_adjust_hook != NULL); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); - BUG_ON(nf_ct_nat_offset != NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); - RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); #ifdef CONFIG_XFRM BUG_ON(nf_nat_decode_session_hook != NULL); RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); @@ -813,10 +808,7 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 46b9baa845a..2840abb5bb9 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -20,67 +20,13 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_l3proto.h> #include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> -#define DUMP_OFFSET(x) \ - pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ - x->offset_before, x->offset_after, x->correction_pos); - -/* Setup TCP sequence correction given this change at this sequence */ -static inline void -adjust_tcp_sequence(u32 seq, - int sizediff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way = &nat->seq[dir]; - - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", - seq, sizediff); - - pr_debug("adjust_tcp_sequence: Seq_offset before: "); - DUMP_OFFSET(this_way); - - spin_lock_bh(&ct->lock); - - /* SYN adjust. If it's uninitialized, or this is after last - * correction, record it: we don't handle more than one - * adjustment in the window, but do deal with common case of a - * retransmit */ - if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; - } - spin_unlock_bh(&ct->lock); - - pr_debug("adjust_tcp_sequence: Seq_offset after: "); - DUMP_OFFSET(this_way); -} - -/* Get the offset value, for conntrack. Caller must have the conntrack locked */ -s32 nf_nat_get_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way; - - if (!nat) - return 0; - - this_way = &nat->seq[dir]; - return after(seq, this_way->correction_pos) - ? this_way->offset_after : this_way->offset_before; -} - /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, unsigned int dataoff, @@ -135,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) return 1; } -void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - __be32 seq, s32 off) -{ - if (!off) - return; - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); -} -EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); - -void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - u32 ctinfo, int off) -{ - const struct tcphdr *th; - - if (nf_ct_protonum(ct) != IPPROTO_TCP) - return; - - th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); -} -EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); - /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -203,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, datalen, oldlen); if (adjust && rep_len != match_len) - nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, - (int)rep_len - (int)match_len); + nf_ct_seqadj_set(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); return 1; } @@ -264,150 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, } EXPORT_SYMBOL(nf_nat_mangle_udp_packet); -/* Adjust one found SACK option including checksum correction */ -static void -sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - unsigned int sackoff, - unsigned int sackend, - struct nf_nat_seq *natseq) -{ - while (sackoff < sackend) { - struct tcp_sack_block_wire *sack; - __be32 new_start_seq, new_end_seq; - - sack = (void *)skb->data + sackoff; - if (after(ntohl(sack->start_seq) - natseq->offset_before, - natseq->correction_pos)) - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_after); - else - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_before); - - if (after(ntohl(sack->end_seq) - natseq->offset_before, - natseq->correction_pos)) - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_after); - else - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_before); - - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); - - inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); - inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); - sack->start_seq = new_start_seq; - sack->end_seq = new_end_seq; - sackoff += sizeof(*sack); - } -} - -/* TCP SACK sequence number adjustment */ -static inline unsigned int -nf_nat_sack_adjust(struct sk_buff *skb, - unsigned int protoff, - struct tcphdr *tcph, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dir, optoff, optend; - struct nf_conn_nat *nat = nfct_nat(ct); - - optoff = protoff + sizeof(struct tcphdr); - optend = protoff + tcph->doff * 4; - - if (!skb_make_writable(skb, optend)) - return 0; - - dir = CTINFO2DIR(ctinfo); - - while (optoff < optend) { - /* Usually: option, length. */ - unsigned char *op = skb->data + optoff; - - switch (op[0]) { - case TCPOPT_EOL: - return 1; - case TCPOPT_NOP: - optoff++; - continue; - default: - /* no partial options */ - if (optoff + 1 == optend || - optoff + op[1] > optend || - op[1] < 2) - return 0; - if (op[0] == TCPOPT_SACK && - op[1] >= 2+TCPOLEN_SACK_PERBLOCK && - ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(skb, tcph, optoff+2, - optoff+op[1], &nat->seq[!dir]); - optoff += op[1]; - } - } - return 1; -} - -/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ -int -nf_nat_seq_adjust(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff) -{ - struct tcphdr *tcph; - int dir; - __be32 newseq, newack; - s32 seqoff, ackoff; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way, *other_way; - int res; - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; - - if (!skb_make_writable(skb, protoff + sizeof(*tcph))) - return 0; - - tcph = (void *)skb->data + protoff; - spin_lock_bh(&ct->lock); - if (after(ntohl(tcph->seq), this_way->correction_pos)) - seqoff = this_way->offset_after; - else - seqoff = this_way->offset_before; - - if (after(ntohl(tcph->ack_seq) - other_way->offset_before, - other_way->correction_pos)) - ackoff = other_way->offset_after; - else - ackoff = other_way->offset_before; - - newseq = htonl(ntohl(tcph->seq) + seqoff); - newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); - - tcph->seq = newseq; - tcph->ack_seq = newack; - - res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); - spin_unlock_bh(&ct->lock); - - return res; -} - /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ void nf_nat_follow_master(struct nf_conn *ct, diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index dac11f73868..f9790405b7f 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -20,6 +20,7 @@ #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <linux/netfilter/nf_conntrack_sip.h> MODULE_LICENSE("GPL"); @@ -308,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, return; th = (struct tcphdr *)(skb->data + protoff); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); + nf_ct_seqadj_set(ct, ctinfo, th->seq, off); } /* Handles expected signalling connections and media streams */ diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c index be893039966..96cac50e0d1 100644 --- a/net/netfilter/nfnetlink_queue_ct.c +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -87,14 +87,14 @@ nla_put_failure: void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, int diff) { - struct nfq_ct_nat_hook *nfq_nat_ct; + struct nfq_ct_hook *nfq_ct; - nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook); - if (nfq_nat_ct == NULL) + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) return; if ((ct->status & IPS_NAT_MASK) && diff) - nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); } int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr, |