summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-09-09 19:51:04 -0700
committerDavid S. Miller <davem@davemloft.net>2008-09-09 19:51:04 -0700
commitdacc62dbf56e872ad96edde0393b9deb56d80cd5 (patch)
tree3d1b3e25aba9c5324bb0f6289033f502fa6ccb8c
parent47abf28d5b36521558a848a346064a3a3c82bd9e (diff)
parentc051a0a2c9e283c1123ed3ce65e66e41d2ce5e24 (diff)
Merge branch 'lvs-next-2.6' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6
-rw-r--r--include/net/ip_vs.h308
-rw-r--r--net/ipv4/ipvs/Kconfig11
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c249
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c806
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c523
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c40
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c61
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_lc.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_nq.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c65
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah_esp.c100
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c253
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c226
-rw-r--r--net/ipv4/ipvs/ip_vs_rr.c13
-rw-r--r--net/ipv4/ipvs/ip_vs_sed.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c40
-rw-r--r--net/ipv4/ipvs/ip_vs_wlc.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_wrr.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c471
23 files changed, 2469 insertions, 796 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a25ad243031..33e2ac6ceb3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -21,11 +21,103 @@
#include <linux/timer.h>
#include <net/checksum.h>
+#include <linux/netfilter.h> /* for union nf_inet_addr */
+#include <linux/ipv6.h> /* for struct ipv6hdr */
+#include <net/ipv6.h> /* for ipv6_addr_copy */
+
+struct ip_vs_iphdr {
+ int len;
+ __u8 protocol;
+ union nf_inet_addr saddr;
+ union nf_inet_addr daddr;
+};
+
+static inline void
+ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ const struct ipv6hdr *iph = nh;
+ iphdr->len = sizeof(struct ipv6hdr);
+ iphdr->protocol = iph->nexthdr;
+ ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
+ ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
+ } else
+#endif
+ {
+ const struct iphdr *iph = nh;
+ iphdr->len = iph->ihl * 4;
+ iphdr->protocol = iph->protocol;
+ iphdr->saddr.ip = iph->saddr;
+ iphdr->daddr.ip = iph->daddr;
+ }
+}
+
+static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
+ const union nf_inet_addr *src)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ipv6_addr_copy(&dst->in6, &src->in6);
+ else
+#endif
+ dst->ip = src->ip;
+}
+
+static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
+ const union nf_inet_addr *b)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return ipv6_addr_equal(&a->in6, &b->in6);
+#endif
+ return a->ip == b->ip;
+}
#ifdef CONFIG_IP_VS_DEBUG
#include <linux/net.h>
extern int ip_vs_get_debug_level(void);
+
+static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
+ const union nf_inet_addr *addr,
+ int *idx)
+{
+ int len;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]",
+ NIP6(addr->in6)) + 1;
+ else
+#endif
+ len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
+ NIPQUAD(addr->ip)) + 1;
+
+ *idx += len;
+ BUG_ON(*idx > buf_len + 1);
+ return &buf[*idx - len];
+}
+
+#define IP_VS_DBG_BUF(level, msg...) \
+ do { \
+ char ip_vs_dbg_buf[160]; \
+ int ip_vs_dbg_idx = 0; \
+ if (level <= ip_vs_get_debug_level()) \
+ printk(KERN_DEBUG "IPVS: " msg); \
+ } while (0)
+#define IP_VS_ERR_BUF(msg...) \
+ do { \
+ char ip_vs_dbg_buf[160]; \
+ int ip_vs_dbg_idx = 0; \
+ printk(KERN_ERR "IPVS: " msg); \
+ } while (0)
+
+/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
+#define IP_VS_DBG_ADDR(af, addr) \
+ ip_vs_dbg_addr(af, ip_vs_dbg_buf, \
+ sizeof(ip_vs_dbg_buf), addr, \
+ &ip_vs_dbg_idx)
+
#define IP_VS_DBG(level, msg...) \
do { \
if (level <= ip_vs_get_debug_level()) \
@@ -48,6 +140,8 @@ extern int ip_vs_get_debug_level(void);
pp->debug_packet(pp, skb, ofs, msg); \
} while (0)
#else /* NO DEBUGGING at ALL */
+#define IP_VS_DBG_BUF(level, msg...) do {} while (0)
+#define IP_VS_ERR_BUF(msg...) do {} while (0)
#define IP_VS_DBG(level, msg...) do {} while (0)
#define IP_VS_DBG_RL(msg...) do {} while (0)
#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0)
@@ -160,27 +254,10 @@ struct ip_vs_estimator {
struct ip_vs_stats
{
- __u32 conns; /* connections scheduled */
- __u32 inpkts; /* incoming packets */
- __u32 outpkts; /* outgoing packets */
- __u64 inbytes; /* incoming bytes */
- __u64 outbytes; /* outgoing bytes */
-
- __u32 cps; /* current connection rate */
- __u32 inpps; /* current in packet rate */
- __u32 outpps; /* current out packet rate */
- __u32 inbps; /* current in byte rate */
- __u32 outbps; /* current out byte rate */
-
- /*
- * Don't add anything before the lock, because we use memcpy() to copy
- * the members before the lock to struct ip_vs_stats_user in
- * ip_vs_ctl.c.
- */
+ struct ip_vs_stats_user ustats; /* statistics */
+ struct ip_vs_estimator est; /* estimator */
spinlock_t lock; /* spin lock */
-
- struct ip_vs_estimator est; /* estimator */
};
struct dst_entry;
@@ -202,21 +279,23 @@ struct ip_vs_protocol {
void (*exit)(struct ip_vs_protocol *pp);
- int (*conn_schedule)(struct sk_buff *skb,
+ int (*conn_schedule)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
- (*conn_in_get)(const struct sk_buff *skb,
+ (*conn_in_get)(int af,
+ const struct sk_buff *skb,
struct ip_vs_protocol *pp,
- const struct iphdr *iph,
+ const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
struct ip_vs_conn *
- (*conn_out_get)(const struct sk_buff *skb,
+ (*conn_out_get)(int af,
+ const struct sk_buff *skb,
struct ip_vs_protocol *pp,
- const struct iphdr *iph,
+ const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -226,7 +305,8 @@ struct ip_vs_protocol {
int (*dnat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
- int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
+ int (*csum_check)(int af, struct sk_buff *skb,
+ struct ip_vs_protocol *pp);
const char *(*state_name)(int state);
@@ -259,9 +339,10 @@ struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
/* Protocol, addresses and port numbers */
- __be32 caddr; /* client address */
- __be32 vaddr; /* virtual address */
- __be32 daddr; /* destination address */
+ u16 af; /* address family */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ union nf_inet_addr daddr; /* destination address */
__be16 cport;
__be16 vport;
__be16 dport;
@@ -305,6 +386,45 @@ struct ip_vs_conn {
/*
+ * Extended internal versions of struct ip_vs_service_user and
+ * ip_vs_dest_user for IPv6 support.
+ *
+ * We need these to conveniently pass around service and destination
+ * options, but unfortunately, we also need to keep the old definitions to
+ * maintain userspace backwards compatibility for the setsockopt interface.
+ */
+struct ip_vs_service_user_kern {
+ /* virtual service addresses */
+ u16 af;
+ u16 protocol;
+ union nf_inet_addr addr; /* virtual ip address */
+ u16 port;
+ u32 fwmark; /* firwall mark of service */
+
+ /* virtual service options */
+ char *sched_name;
+ unsigned flags; /* virtual service flags */
+ unsigned timeout; /* persistent timeout in sec */
+ u32 netmask; /* persistent netmask */
+};
+
+
+struct ip_vs_dest_user_kern {
+ /* destination server address */
+ union nf_inet_addr addr;
+ u16 port;
+
+ /* real server options */
+ unsigned conn_flags; /* connection flags */
+ int weight; /* destination weight */
+
+ /* thresholds for active connections */
+ u32 u_threshold; /* upper threshold */
+ u32 l_threshold; /* lower threshold */
+};
+
+
+/*
* The information about the virtual service offered to the net
* and the forwarding entries
*/
@@ -314,8 +434,9 @@ struct ip_vs_service {
atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */
+ u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
- __be32 addr; /* IP address for virtual service */
+ union nf_inet_addr addr; /* IP address for virtual service */
__be16 port; /* port number for the service */
__u32 fwmark; /* firewall mark of the service */
unsigned flags; /* service status flags */
@@ -342,7 +463,8 @@ struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
struct list_head d_list; /* for table with all the dests */
- __be32 addr; /* IP address of the server */
+ u16 af; /* address family */
+ union nf_inet_addr addr; /* IP address of the server */
__be16 port; /* port number of the server */
volatile unsigned flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
@@ -366,7 +488,7 @@ struct ip_vs_dest {
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
- __be32 vaddr; /* virtual IP address */
+ union nf_inet_addr vaddr; /* virtual IP address */
__be16 vport; /* virtual port number */
__u32 vfwmark; /* firewall mark of service */
};
@@ -380,6 +502,9 @@ struct ip_vs_scheduler {
char *name; /* scheduler name */
atomic_t refcnt; /* reference counter */
struct module *module; /* THIS_MODULE/NULL */
+#ifdef CONFIG_IP_VS_IPV6
+ int supports_ipv6; /* scheduler has IPv6 support */
+#endif
/* scheduler initializing service */
int (*init_service)(struct ip_vs_service *svc);
@@ -479,16 +604,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
#ifndef CONFIG_IP_VS_TAB_BITS
#define CONFIG_IP_VS_TAB_BITS 12
#endif
-/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
-#if CONFIG_IP_VS_TAB_BITS < 8
-#define IP_VS_CONN_TAB_BITS 8
-#endif
-#if CONFIG_IP_VS_TAB_BITS > 20
-#define IP_VS_CONN_TAB_BITS 20
-#endif
-#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
+
#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS
-#endif
#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS)
#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1)
@@ -500,11 +617,16 @@ enum {
};
extern struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
extern struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
extern struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -515,8 +637,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp);
extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
extern struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
- __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ const union nf_inet_addr *vaddr, __be16 vport,
+ const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
@@ -532,24 +655,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp)
{
struct ip_vs_conn *ctl_cp = cp->control;
if (!ctl_cp) {
- IP_VS_ERR("request control DEL for uncontrolled: "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("request control DEL for uncontrolled: "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
return;
}
- IP_VS_DBG(7, "DELeting control for: "
- "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+ IP_VS_DBG_BUF(7, "DELeting control for: "
+ "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+ ntohs(ctl_cp->cport));
cp->control = NULL;
if (atomic_read(&ctl_cp->n_control) == 0) {
- IP_VS_ERR("BUG control DEL with n=0 : "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("BUG control DEL with n=0 : "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
return;
}
atomic_dec(&ctl_cp->n_control);
@@ -559,17 +690,22 @@ static inline void
ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
{
if (cp->control) {
- IP_VS_ERR("request control ADD for already controlled: "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("request control ADD for already controlled: "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
ip_vs_control_del(cp);
}
- IP_VS_DBG(7, "ADDing control for: "
- "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+ IP_VS_DBG_BUF(7, "ADDing control for: "
+ "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+ ntohs(ctl_cp->cport));
cp->control = ctl_cp;
atomic_inc(&ctl_cp->n_control);
@@ -647,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
@@ -655,14 +792,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
}
extern struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport);
+ip_vs_lookup_real_service(int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport);
+
extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
-ip_vs_find_dest(__be32 daddr, __be16 dport,
- __be32 vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
@@ -706,6 +845,19 @@ extern int ip_vs_icmp_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
+#ifdef CONFIG_IP_VS_IPV6
+extern int ip_vs_bypass_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_nat_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_tunnel_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_dr_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_icmp_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
+ int offset);
+#endif
/*
* This is a simple mechanism to ignore packets when
@@ -750,7 +902,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
}
extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
- struct ip_vs_conn *cp, int dir);
+ struct ip_vs_conn *cp, int dir);
+
+#ifdef CONFIG_IP_VS_IPV6
+extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int dir);
+#endif
extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
@@ -761,6 +918,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
return csum_partial((char *) diff, sizeof(diff), oldsum);
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
+ __wsum oldsum)
+{
+ __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
+ new[3], new[2], new[1], new[0] };
+
+ return csum_partial((char *) diff, sizeof(diff), oldsum);
+}
+#endif
+
static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
{
__be16 diff[2] = { ~old, new };
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 2e48a7e2722..de6004de80b 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -24,6 +24,14 @@ menuconfig IP_VS
if IP_VS
+config IP_VS_IPV6
+ bool "IPv6 support for IPVS (DANGEROUS)"
+ depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+ ---help---
+ Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+ Say N if unsure.
+
config IP_VS_DEBUG
bool "IP virtual server debugging"
---help---
@@ -33,7 +41,8 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- default "12"
+ range 8 20
+ default 12
---help---
The IPVS connection hash table uses the chaining scheme to handle
hash collisions. Using a big IPVS connection hash table will greatly
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 44a6872dc24..9a24332fbed 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+ const union nf_inet_addr *addr,
+ __be16 port)
{
- return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd)
+ & IP_VS_CONN_TAB_MASK;
+#endif
+ return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd)
& IP_VS_CONN_TAB_MASK;
}
@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
int ret;
/* Hash by protocol, client address and port */
- hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
int ret;
/* unhash it and decrease its reference counter */
- hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
@@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
* d_addr, d_port: pkt dest address (load balancer)
*/
static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
- hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (s_addr==cp->caddr && s_port==cp->cport &&
- d_port==cp->vport && d_addr==cp->vaddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
- protocol==cp->protocol) {
+ protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
}
struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
struct ip_vs_conn *cp;
- cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
- cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+ d_port);
- IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- cp?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
return cp;
}
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
- hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (s_addr==cp->caddr && s_port==cp->cport &&
- d_port==cp->vport && d_addr==cp->vaddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
- protocol==cp->protocol) {
+ protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
goto out;
@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
out:
ct_read_unlock(hash);
- IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- cp?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
return cp;
}
@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
* d_addr, d_port: pkt dest address (foreign host)
*/
struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp, *ret=NULL;
@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
/*
* Check for "full" addressed entries
*/
- hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+ hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (d_addr == cp->caddr && d_port == cp->cport &&
- s_port == cp->dport && s_addr == cp->daddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+ d_port == cp->cport && s_port == cp->dport &&
protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
ct_read_unlock(hash);
- IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- ret?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ ret ? "hit" : "not hit");
return ret;
}
@@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
}
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+ switch (IP_VS_FWD_METHOD(cp)) {
+ case IP_VS_CONN_F_MASQ:
+ cp->packet_xmit = ip_vs_nat_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_TUNNEL:
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_DROUTE:
+ cp->packet_xmit = ip_vs_dr_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_LOCALNODE:
+ cp->packet_xmit = ip_vs_null_xmit;
+ break;
+
+ case IP_VS_CONN_F_BYPASS:
+ cp->packet_xmit = ip_vs_bypass_xmit_v6;
+ break;
+ }
+}
+#endif
+
static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
{
@@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
- IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
- "dest->refcnt:%d\n",
- ip_vs_proto_name(cp->protocol),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(cp->daddr, cp->dport,
- cp->vaddr, cp->vport, cp->protocol);
+ dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+ &cp->vaddr, cp->vport,
+ cp->protocol);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
if (!dest)
return;
- IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
- "dest->refcnt:%d\n",
- ip_vs_proto_name(cp->protocol),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
(sysctl_ip_vs_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
- IP_VS_DBG(9, "check_template: dest not available for "
- "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "-> d:%u.%u.%u.%u:%d\n",
- ip_vs_proto_name(ct->protocol),
- NIPQUAD(ct->caddr), ntohs(ct->cport),
- NIPQUAD(ct->vaddr), ntohs(ct->vport),
- NIPQUAD(ct->daddr), ntohs(ct->dport));
+ IP_VS_DBG_BUF(9, "check_template: dest not available for "
+ "protocol %s s:%s:%d v:%s:%d "
+ "-> d:%s:%d\n",
+ ip_vs_proto_name(ct->protocol),
+ IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+ ntohs(ct->cport),
+ IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+ ntohs(ct->vport),
+ IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ ntohs(ct->dport));
/*
* Invalidate the connection template
@@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
- __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ const union nf_inet_addr *vaddr, __be16 vport,
+ const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest)
{
struct ip_vs_conn *cp;
@@ -640,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ cp->af = af;
cp->protocol = proto;
- cp->caddr = caddr;
+ ip_vs_addr_copy(af, &cp->caddr, caddr);
cp->cport = cport;
- cp->vaddr = vaddr;
+ ip_vs_addr_copy(af, &cp->vaddr, vaddr);
cp->vport = vport;
- cp->daddr = daddr;
+ ip_vs_addr_copy(af, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
spin_lock_init(&cp->lock);
@@ -672,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
cp->timeout = 3*HZ;
/* Bind its packet transmitter */
- ip_vs_bind_xmit(cp);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_bind_xmit_v6(cp);
+ else
+#endif
+ ip_vs_bind_xmit(cp);
if (unlikely(pp && atomic_read(&pp->appcnt)))
ip_vs_bind_app(cp, pp);
@@ -760,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
else {
const struct ip_vs_conn *cp = v;
- seq_printf(seq,
- "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ seq_printf(seq,
+ "%-3s " NIP6_FMT " %04X " NIP6_FMT
+ " %04X " NIP6_FMT " %04X %-11s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ NIP6(cp->caddr.in6), ntohs(cp->cport),
+ NIP6(cp->vaddr.in6), ntohs(cp->vport),
+ NIP6(cp->daddr.in6), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ (cp->timer.expires-jiffies)/HZ);
+ else
+#endif
+ seq_printf(seq,
+ "%-3s %08X %04X %08X %04X"
+ " %08X %04X %-11s %7lu\n",
ip_vs_proto_name(cp->protocol),
- ntohl(cp->caddr), ntohs(cp->cport),
- ntohl(cp->vaddr), ntohs(cp->vport),
- ntohl(cp->daddr), ntohs(cp->dport),
+ ntohl(cp->caddr.ip), ntohs(cp->cport),
+ ntohl(cp->vaddr.ip), ntohs(cp->vport),
+ ntohl(cp->daddr.ip), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ);
}
@@ -809,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
else {
const struct ip_vs_conn *cp = v;
- seq_printf(seq,
- "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ seq_printf(seq,
+ "%-3s " NIP6_FMT " %04X " NIP6_FMT
+ " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ NIP6(cp->caddr.in6), ntohs(cp->cport),
+ NIP6(cp->vaddr.in6), ntohs(cp->vport),
+ NIP6(cp->daddr.in6), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_origin_name(cp->flags),
+ (cp->timer.expires-jiffies)/HZ);
+ else
+#endif
+ seq_printf(seq,
+ "%-3s %08X %04X %08X %04X "
+ "%08X %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
- ntohl(cp->caddr), ntohs(cp->cport),
- ntohl(cp->vaddr), ntohs(cp->vport),
- ntohl(cp->daddr), ntohs(cp->dport),
+ ntohl(cp->caddr.ip), ntohs(cp->cport),
+ ntohl(cp->vaddr.ip), ntohs(cp->vport),
+ ntohl(cp->daddr.ip), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 9fbf0a6d739..80a4fcf33a5 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -39,6 +39,11 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#endif
+
#include <net/ip_vs.h>
@@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
/* ID used in ICMP lookups */
#define icmp_id(icmph) (((icmph)->un).echo.id)
+#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier)
const char *ip_vs_proto_name(unsigned proto)
{
@@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto)
return "TCP";
case IPPROTO_ICMP:
return "ICMP";
+#ifdef CONFIG_IP_VS_IPV6
+ case IPPROTO_ICMPV6:
+ return "ICMPv6";
+#endif
default:
sprintf(buf, "IP_%d", proto);
return buf;
@@ -92,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_dest *dest = cp->dest;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
- dest->stats.inpkts++;
- dest->stats.inbytes += skb->len;
+ dest->stats.ustats.inpkts++;
+ dest->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->stats.lock);
spin_lock(&dest->svc->stats.lock);
- dest->svc->stats.inpkts++;
- dest->svc->stats.inbytes += skb->len;
+ dest->svc->stats.ustats.inpkts++;
+ dest->svc->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.inpkts++;
- ip_vs_stats.inbytes += skb->len;
+ ip_vs_stats.ustats.inpkts++;
+ ip_vs_stats.ustats.inbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
}
}
@@ -115,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_dest *dest = cp->dest;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
- dest->stats.outpkts++;
- dest->stats.outbytes += skb->len;
+ dest->stats.ustats.outpkts++;
+ dest->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->stats.lock);
spin_lock(&dest->svc->stats.lock);
- dest->svc->stats.outpkts++;
- dest->svc->stats.outbytes += skb->len;
+ dest->svc->stats.ustats.outpkts++;
+ dest->svc->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.outpkts++;
- ip_vs_stats.outbytes += skb->len;
+ ip_vs_stats.ustats.outpkts++;
+ ip_vs_stats.ustats.outbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
}
}
@@ -136,15 +146,15 @@ static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
spin_lock(&cp->dest->stats.lock);
- cp->dest->stats.conns++;
+ cp->dest->stats.ustats.conns++;
spin_unlock(&cp->dest->stats.lock);
spin_lock(&svc->stats.lock);
- svc->stats.conns++;
+ svc->stats.ustats.conns++;
spin_unlock(&svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.conns++;
+ ip_vs_stats.ustats.conns++;
spin_unlock(&ip_vs_stats.lock);
}
@@ -173,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
__be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
- __be16 dport; /* destination port to forward */
- __be32 snet; /* source network of the client, after masking */
+ __be16 dport; /* destination port to forward */
+ union nf_inet_addr snet; /* source network of the client,
+ after masking */
+
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
/* Mask saddr with the netmask to adjust template granularity */
- snet = iph->saddr & svc->netmask;
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
+ else
+#endif
+ snet.ip = iph.saddr.ip & svc->netmask;
- IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
- "mnet %u.%u.%u.%u\n",
- NIPQUAD(iph->saddr), ntohs(ports[0]),
- NIPQUAD(iph->daddr), ntohs(ports[1]),
- NIPQUAD(snet));
+ IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
+ "mnet %s\n",
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &snet));
/*
* As far as we know, FTP is a very complicated network protocol, and
@@ -204,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
if (ports[1] == svc->port) {
/* Check if a template already exists */
if (svc->port != FTPPORT)
- ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
- iph->daddr, ports[1]);
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, ports[1]);
else
- ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
- iph->daddr, 0);
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
/*
@@ -228,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* for ftp service.
*/
if (svc->port != FTPPORT)
- ct = ip_vs_conn_new(iph->protocol,
- snet, 0,
- iph->daddr,
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr,
ports[1],
- dest->addr, dest->port,
+ &dest->addr, dest->port,
IP_VS_CONN_F_TEMPLATE,
dest);
else
- ct = ip_vs_conn_new(iph->protocol,
- snet, 0,
- iph->daddr, 0,
- dest->addr, 0,
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr, 0,
+ &dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
@@ -258,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
* port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
*/
- if (svc->fwmark)
- ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
- htonl(svc->fwmark), 0);
- else
- ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
- iph->daddr, 0);
+ if (svc->fwmark) {
+ union nf_inet_addr fwmark = {
+ .all = { 0, 0, 0, htonl(svc->fwmark) }
+ };
+
+ ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
+ &fwmark, 0);
+ } else
+ ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
+ &iph.daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
/*
@@ -282,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a template according to the service
*/
- if (svc->fwmark)
- ct = ip_vs_conn_new(IPPROTO_IP,
- snet, 0,
- htonl(svc->fwmark), 0,
- dest->addr, 0,
+ if (svc->fwmark) {
+ union nf_inet_addr fwmark = {
+ .all = { 0, 0, 0, htonl(svc->fwmark) }
+ };
+
+ ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
+ &snet, 0,
+ &fwmark, 0,
+ &dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
- else
- ct = ip_vs_conn_new(iph->protocol,
- snet, 0,
- iph->daddr, 0,
- dest->addr, 0,
+ } else
+ ct = ip_vs_conn_new(svc->af, iph.protocol,
+ &snet, 0,
+ &iph.daddr, 0,
+ &dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
@@ -310,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, ports[0],
- iph->daddr, ports[1],
- dest->addr, dport,
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, ports[0],
+ &iph.daddr, ports[1],
+ &dest->addr, dport,
0,
dest);
if (cp == NULL) {
@@ -342,12 +368,12 @@ struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, iph->ihl*4,
- sizeof(_ports), _ports);
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
+ pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -377,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/*
* Create a connection entry.
*/
- cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1],
- dest->addr, dest->port?dest->port:pptr[1],
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, pptr[0],
+ &iph.daddr, pptr[1],
+ &dest->addr, dest->port ? dest->port : pptr[1],
0,
dest);
if (cp == NULL)
return NULL;
- IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
- "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
- ip_vs_fwd_tag(cp),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
+ "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
+ ip_vs_fwd_tag(cp),
+ IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
@@ -408,20 +434,27 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
__be16 _ports[2], *pptr;
- struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_iphdr iph;
+ int unicast;
+ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
- pptr = skb_header_pointer(skb, iph->ihl*4,
- sizeof(_ports), _ports);
+ pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
}
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
+ else
+#endif
+ unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+
/* if it is fwmark-based service, the cache_bypass sysctl is up
- and the destination is RTN_UNICAST (and not local), then create
+ and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark
- && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
+ if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
@@ -429,9 +462,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/* create a new connection entry */
IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
- cp = ip_vs_conn_new(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1],
+ cp = ip_vs_conn_new(svc->af, iph.protocol,
+ &iph.saddr, pptr[0],
+ &iph.daddr, pptr[1],
0, 0,
IP_VS_CONN_F_BYPASS,
NULL);
@@ -473,7 +506,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* created, the TCP RST packet cannot be sent, instead that
* ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
*/
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
+ skb->dev);
+ else
+#endif
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
return NF_DROP;
}
@@ -512,6 +552,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
+{
+ /* TODO IPv6: Find out what to do here for IPv6 */
+ return 0;
+}
+#endif
+
/*
* Packet has been made sufficiently writable in caller
* - inout: 1=in->out, 0=out->in
@@ -526,14 +574,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *ciph = (struct iphdr *)(icmph + 1);
if (inout) {
- iph->saddr = cp->vaddr;
+ iph->saddr = cp->vaddr.ip;
ip_send_check(iph);
- ciph->daddr = cp->vaddr;
+ ciph->daddr = cp->vaddr.ip;
ip_send_check(ciph);
} else {
- iph->daddr = cp->daddr;
+ iph->daddr = cp->daddr.ip;
ip_send_check(iph);
- ciph->saddr = cp->daddr;
+ ciph->saddr = cp->daddr.ip;
ip_send_check(ciph);
}
@@ -560,21 +608,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
"Forwarding altered incoming ICMP");
}
+#ifdef CONFIG_IP_VS_IPV6
+void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int inout)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int icmp_offset = sizeof(struct ipv6hdr);
+ struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) +
+ icmp_offset);
+ struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1);
+
+ if (inout) {
+ iph->saddr = cp->vaddr.in6;
+ ciph->daddr = cp->vaddr.in6;
+ } else {
+ iph->daddr = cp->daddr.in6;
+ ciph->saddr = cp->daddr.in6;
+ }
+
+ /* the TCP/UDP port */
+ if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+ __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
+
+ if (inout)
+ ports[1] = cp->vport;
+ else
+ ports[0] = cp->dport;
+ }
+
+ /* And finally the ICMP checksum */
+ icmph->icmp6_cksum = 0;
+ /* TODO IPv6: is this correct for ICMPv6? */
+ ip_vs_checksum_complete(skb, icmp_offset);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (inout)
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered outgoing ICMPv6");
+ else
+ IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ "Forwarding altered incoming ICMPv6");
+}
+#endif
+
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(int af, struct sk_buff *skb,
+ union nf_inet_addr *snet,
+ __u8 protocol, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp,
+ unsigned int offset, unsigned int ihl)
+{
+ unsigned int verdict = NF_DROP;
+
+ if (IP_VS_FWD_METHOD(cp) != 0) {
+ IP_VS_ERR("shouldn't reach here, because the box is on the "
+ "half connection in the tun/dr module.\n");
+ }
+
+ /* Ensure the checksum is correct */
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+ /* Failed checksum! */
+ IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
+ IP_VS_DBG_ADDR(af, snet));
+ goto out;
+ }
+
+ if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+ offset += 2 * sizeof(__u16);
+ if (!skb_make_writable(skb, offset))
+ goto out;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_nat_icmp_v6(skb, pp, cp, 1);
+ else
+#endif
+ ip_vs_nat_icmp(skb, pp, cp, 1);
+
+ /* do the statistics and put it back */
+ ip_vs_out_stats(cp, skb);
+
+ skb->ipvs_property = 1;
+ verdict = NF_ACCEPT;
+
+out:
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+
/*
* Handle ICMP messages in the inside-to-outside direction (outgoing).
- * Find any that might be relevant, check against existing connections,
- * forward to the right destination host if relevant.
+ * Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
- * (Only used in VS/NAT)
*/
static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
+ struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
- unsigned int offset, ihl, verdict;
+ unsigned int offset, ihl;
+ union nf_inet_addr snet;
*related = 1;
@@ -627,102 +766,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
offset += cih->ihl * 4;
+ ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(skb, pp, cih, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
- verdict = NF_DROP;
+ snet.ip = iph->saddr;
+ return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
+ pp, offset, ihl);
+}
- if (IP_VS_FWD_METHOD(cp) != 0) {
- IP_VS_ERR("shouldn't reach here, because the box is on the "
- "half connection in the tun/dr module.\n");
+#ifdef CONFIG_IP_VS_IPV6
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+{
+ struct ipv6hdr *iph;
+ struct icmp6hdr _icmph, *ic;
+ struct ipv6hdr _ciph, *cih; /* The ip header contained
+ within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+ if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+ return NF_STOLEN;
}
- /* Ensure the checksum is correct */
- if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
- /* Failed checksum! */
- IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
- NIPQUAD(iph->saddr));
- goto out;
+ iph = ipv6_hdr(skb);
+ offset = sizeof(struct ipv6hdr);
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ NIP6(iph->saddr), NIP6(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+ (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+ (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ *related = 0;
+ return NF_ACCEPT;
}
- if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
- offset += 2 * sizeof(__u16);
- if (!skb_make_writable(skb, offset))
- goto out;
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
- ip_vs_nat_icmp(skb, pp, cp, 1);
+ pp = ip_vs_proto_get(cih->nexthdr);
+ if (!pp)
+ return NF_ACCEPT;
- /* do the statistics and put it back */
- ip_vs_out_stats(cp, skb);
+ /* Is the embedded protocol header present? */
+ /* TODO: we don't support fragmentation at the moment anyways */
+ if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ return NF_ACCEPT;
- skb->ipvs_property = 1;
- verdict = NF_ACCEPT;
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
- out:
- __ip_vs_conn_put(cp);
+ offset += sizeof(struct ipv6hdr);
- return verdict;
+ ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (!cp)
+ return NF_ACCEPT;
+
+ ipv6_addr_copy(&snet.in6, &iph->saddr);
+ return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
+ pp, offset, sizeof(struct ipv6hdr));
}
+#endif
-static inline int is_tcp_reset(const struct sk_buff *skb)
+static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
return th->rst;
}
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int ihl)
+{
+ IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+ if (!skb_make_writable(skb, ihl))
+ goto drop;
+
+ /* mangle the packet */
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+ goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+ else
+#endif
+ {
+ ip_hdr(skb)->saddr = cp->vaddr.ip;
+ ip_send_check(ip_hdr(skb));
+ }
+
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (ip6_route_me_harder(skb) != 0)
+ goto drop;
+ } else
+#endif
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto drop;
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+ ip_vs_out_stats(cp, skb);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_conn_put(cp);
+
+ skb->ipvs_property = 1;
+
+ LeaveFunction(11);
+ return NF_ACCEPT;
+
+drop:
+ ip_vs_conn_put(cp);
+ kfree_skb(skb);
+ return NF_STOLEN;
+}
+
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- * Check if outgoing packet belongs to the established ip_vs_conn,
- * rewrite addresses of the packet and send it on its way...
+ * Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct iphdr *iph;
+ struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
- int ihl;
+ int af;
EnterFunction(11);
+ af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
if (skb->ipvs_property)
return NF_ACCEPT;
- iph = ip_hdr(skb);
- if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_out_icmp(skb, &related);
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+ int related, verdict = ip_vs_out_icmp_v6(skb, &related);
- if (related)
- return verdict;
- iph = ip_hdr(skb);
- }
+ if (related)
+ return verdict;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+ } else
+#endif
+ if (unlikely(iph.protocol == IPPROTO_ICMP)) {
+ int related, verdict = ip_vs_out_icmp(skb, &related);
- pp = ip_vs_proto_get(iph->protocol);
+ if (related)
+ return verdict;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+
+ pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
return NF_ACCEPT;
/* reassemble IP fragments */
- if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
- !pp->dont_defrag)) {
- if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
- return NF_STOLEN;
- iph = ip_hdr(skb);
- }
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
+ int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+
+ if (related)
+ return verdict;
- ihl = iph->ihl << 2;
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
+ } else
+#endif
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
+ !pp->dont_defrag)) {
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+ return NF_STOLEN;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ }
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
@@ -730,21 +998,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
pp->protocol == IPPROTO_UDP)) {
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, ihl,
+ pptr = skb_header_pointer(skb, iph.len,
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(iph->protocol,
- iph->saddr, pptr[0])) {
+ if (ip_vs_lookup_real_service(af, iph.protocol,
+ &iph.saddr,
+ pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
* packet or not TCP packet.
*/
- if (iph->protocol != IPPROTO_TCP
- || !is_tcp_reset(skb)) {
- icmp_send(skb,ICMP_DEST_UNREACH,
- ICMP_PORT_UNREACH, 0);
+ if (iph.protocol != IPPROTO_TCP
+ || !is_tcp_reset(skb, iph.len)) {
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ icmpv6_send(skb,
+ ICMPV6_DEST_UNREACH,
+ ICMPV6_PORT_UNREACH,
+ 0, skb->dev);
+ else
+#endif
+ icmp_send(skb,
+ ICMP_DEST_UNREACH,
+ ICMP_PORT_UNREACH, 0);
return NF_DROP;
}
}
@@ -754,41 +1032,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
return NF_ACCEPT;
}
- IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
- if (!skb_make_writable(skb, ihl))
- goto drop;
-
- /* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
- goto drop;
- ip_hdr(skb)->saddr = cp->vaddr;
- ip_send_check(ip_hdr(skb));
-
- /* For policy routing, packets originating from this
- * machine itself may be routed differently to packets
- * passing through. We want this packet to be routed as
- * if it came from this machine itself. So re-compute
- * the routing information.
- */
- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto drop;
-
- IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
- ip_vs_out_stats(cp, skb);
- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
- ip_vs_conn_put(cp);
-
- skb->ipvs_property = 1;
-
- LeaveFunction(11);
- return NF_ACCEPT;
-
- drop:
- ip_vs_conn_put(cp);
- kfree_skb(skb);
- return NF_STOLEN;
+ return handle_response(af, skb, pp, cp, iph.len);
}
@@ -804,9 +1048,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
+ struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
+ union nf_inet_addr snet;
*related = 1;
@@ -860,10 +1106,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
offset += cih->ihl * 4;
+ ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(skb, pp, cih, offset, 1);
- if (!cp)
+ cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (!cp) {
+ /* The packet could also belong to a local client */
+ cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (cp) {
+ snet.ip = iph->saddr;
+ return handle_response_icmp(AF_INET, skb, &snet,
+ cih->protocol, cp, pp,
+ offset, ihl);
+ }
return NF_ACCEPT;
+ }
verdict = NF_DROP;
@@ -888,6 +1144,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
return verdict;
}
+#ifdef CONFIG_IP_VS_IPV6
+static int
+ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
+{
+ struct ipv6hdr *iph;
+ struct icmp6hdr _icmph, *ic;
+ struct ipv6hdr _ciph, *cih; /* The ip header contained
+ within the ICMP */
+ struct ip_vs_iphdr ciph;
+ struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ unsigned int offset, verdict;
+ union nf_inet_addr snet;
+
+ *related = 1;
+
+ /* reassemble IP fragments */
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+ if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
+ IP_DEFRAG_VS_IN :
+ IP_DEFRAG_VS_FWD))
+ return NF_STOLEN;
+ }
+
+ iph = ipv6_hdr(skb);
+ offset = sizeof(struct ipv6hdr);
+ ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (ic == NULL)
+ return NF_DROP;
+
+ IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
+ ic->icmp6_type, ntohs(icmpv6_id(ic)),
+ NIP6(iph->saddr), NIP6(iph->daddr));
+
+ /*
+ * Work through seeing if this is for us.
+ * These checks are supposed to be in an order that means easy
+ * things are checked first to speed up processing.... however
+ * this means that some packets will manage to get a long way
+ * down this stack and then be rejected, but that's life.
+ */
+ if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
+ (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
+ (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
+ *related = 0;
+ return NF_ACCEPT;
+ }
+
+ /* Now find the contained IP header */
+ offset += sizeof(_icmph);
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+
+ pp = ip_vs_proto_get(cih->nexthdr);
+ if (!pp)
+ return NF_ACCEPT;
+
+ /* Is the embedded protocol header present? */
+ /* TODO: we don't support fragmentation at the moment anyways */
+ if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
+ return NF_ACCEPT;
+
+ IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+
+ offset += sizeof(struct ipv6hdr);
+
+ ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
+ /* The embedded headers contain source and dest in reverse order */
+ cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (!cp) {
+ /* The packet could also belong to a local client */
+ cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ if (cp) {
+ ipv6_addr_copy(&snet.in6, &iph->saddr);
+ return handle_response_icmp(AF_INET6, skb, &snet,
+ cih->nexthdr,
+ cp, pp, offset,
+ sizeof(struct ipv6hdr));
+ }
+ return NF_ACCEPT;
+ }
+
+ verdict = NF_DROP;
+
+ /* do the statistics and put it back */
+ ip_vs_in_stats(cp, skb);
+ if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+ offset += 2 * sizeof(__u16);
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
+ /* do not touch skb anymore */
+
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+#endif
+
+
/*
* Check if it's for virtual services, look it up,
* and send it on its way...
@@ -897,50 +1252,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct iphdr *iph;
+ struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
- int ret, restart;
- int ihl;
+ int ret, restart, af;
+
+ af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/*
- * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
- * ... don't know why 1st test DOES NOT include 2nd (?)
+ * Big tappo: only PACKET_HOST, including loopback for local client
+ * Don't handle local packets on IPv6 for now
*/
- if (unlikely(skb->pkt_type != PACKET_HOST
- || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
- IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
- skb->pkt_type,
- ip_hdr(skb)->protocol,
- NIPQUAD(ip_hdr(skb)->daddr));
+ if (unlikely(skb->pkt_type != PACKET_HOST)) {
+ IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
+ skb->pkt_type,
+ iph.protocol,
+ IP_VS_DBG_ADDR(af, &iph.daddr));
return NF_ACCEPT;
}
- iph = ip_hdr(skb);
- if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+ if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
- iph = ip_hdr(skb);
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
/* Protocol supported? */
- pp = ip_vs_proto_get(iph->protocol);
+ pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
return NF_ACCEPT;
- ihl = iph->ihl << 2;
-
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
+ cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
- if (!pp->conn_schedule(skb, pp, &v, &cp))
+ /* For local client packets, it could be a response */
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ if (cp)
+ return handle_response(af, skb, pp, cp, iph.len);
+
+ if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v;
}
@@ -984,7 +1343,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
atomic_inc(&cp->in_pkts);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if (af == AF_INET &&
+ (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
@@ -1023,6 +1383,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
return ip_vs_in_icmp(skb, &r, hooknum);
}
+#ifdef CONFIG_IP_VS_IPV6
+static unsigned int
+ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ int r;
+
+ if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+ return NF_ACCEPT;
+
+ return ip_vs_in_icmp_v6(skb, &r, hooknum);
+}
+#endif
+
static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, forward packet through VS/DR, VS/TUN,
@@ -1060,6 +1435,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC-1,
},
+#ifdef CONFIG_IP_VS_IPV6
+ /* After packet filtering, forward packet through VS/DR, VS/TUN,
+ * or VS/NAT(change destination), so that filtering rules can be
+ * applied to IPVS. */
+ {
+ .hook = ip_vs_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 100,
+ },
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
+ },
+ /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+ {
+ .hook = ip_vs_forward_icmp_v6,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
+ },
+ /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ {
+ .hook = ip_vs_post_routing,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC-1,
+ },
+#endif
};
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index ede101eeec1..993a83fb0d5 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -35,6 +35,10 @@
#include <net/net_namespace.h>
#include <net/ip.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#endif
#include <net/route.h>
#include <net/sock.h>
#include <net/genetlink.h>
@@ -91,6 +95,26 @@ int ip_vs_get_debug_level(void)
}
#endif
+#ifdef CONFIG_IP_VS_IPV6
+/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
+static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+{
+ struct rt6_info *rt;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *addr,
+ .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+ return 1;
+
+ return 0;
+}
+#endif
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
@@ -282,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
* Returns hash value for virtual service
*/
static __inline__ unsigned
-ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
+ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
+ __be16 port)
{
register unsigned porth = ntohs(port);
+ __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ addr_fold = addr->ip6[0]^addr->ip6[1]^
+ addr->ip6[2]^addr->ip6[3];
+#endif
- return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
+ return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
& IP_VS_SVC_TAB_MASK;
}
@@ -317,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
* Hash it by <protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
+ hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
+ svc->port);
list_add(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
@@ -363,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
/*
* Get service by {proto,addr,port} in the service table.
*/
-static __inline__ struct ip_vs_service *
-__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
+static inline struct ip_vs_service *
+__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
+ __be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
- if ((svc->addr == vaddr)
+ if ((svc->af == af)
+ && ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
&& (svc->protocol == protocol)) {
/* HIT */
@@ -389,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
/*
* Get service by {fwmark} in the service table.
*/
-static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
+static inline struct ip_vs_service *
+__ip_vs_svc_fwm_get(int af, __u32 fwmark)
{
unsigned hash;
struct ip_vs_service *svc;
@@ -398,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
hash = ip_vs_svc_fwm_hashkey(fwmark);
list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
- if (svc->fwmark == fwmark) {
+ if (svc->fwmark == fwmark && svc->af == af) {
/* HIT */
atomic_inc(&svc->usecnt);
return svc;
@@ -409,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
}
struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
@@ -418,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
/*
* Check the table hashed by fwmark first
*/
- if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
+ if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
goto out;
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_get(protocol, vaddr, vport);
+ svc = __ip_vs_service_get(af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
@@ -435,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
@@ -443,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_get(protocol, vaddr, 0);
+ svc = __ip_vs_service_get(af, protocol, vaddr, 0);
}
out:
read_unlock(&__ip_vs_svc_lock);
- IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
- fwmark, ip_vs_proto_name(protocol),
- NIPQUAD(vaddr), ntohs(vport),
- svc?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
+ fwmark, ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
+ svc ? "hit" : "not hit");
return svc;
}
@@ -479,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
/*
* Returns hash value for real service
*/
-static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
+static inline unsigned ip_vs_rs_hashkey(int af,
+ const union nf_inet_addr *addr,
+ __be16 port)
{
register unsigned porth = ntohs(port);
+ __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ addr_fold = addr->ip6[0]^addr->ip6[1]^
+ addr->ip6[2]^addr->ip6[3];
+#endif
- return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
+ return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
& IP_VS_RTAB_MASK;
}
@@ -503,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
* Hash by proto,addr,port,
* which are the parameters of the real service.
*/
- hash = ip_vs_rs_hashkey(dest->addr, dest->port);
+ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+
list_add(&dest->d_list, &ip_vs_rtable[hash]);
return 1;
@@ -530,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
+ip_vs_lookup_real_service(int af, __u16 protocol,
+ const union nf_inet_addr *daddr,
+ __be16 dport)
{
unsigned hash;
struct ip_vs_dest *dest;
@@ -539,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
* Check for "full" addressed entries
* Return the first found entry
*/
- hash = ip_vs_rs_hashkey(daddr, dport);
+ hash = ip_vs_rs_hashkey(af, daddr, dport);
read_lock(&__ip_vs_rs_lock);
list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
- if ((dest->addr == daddr)
+ if ((dest->af == af)
+ && ip_vs_addr_equal(af, &dest->addr, daddr)
&& (dest->port == dport)
&& ((dest->protocol == protocol) ||
dest->vfwmark)) {
@@ -561,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
* Lookup destination by {addr,port} in the given service
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+ __be16 dport)
{
struct ip_vs_dest *dest;
@@ -569,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Find the destination for the given service
*/
list_for_each_entry(dest, &svc->destinations, n_list) {
- if ((dest->addr == daddr) && (dest->port == dport)) {
+ if ((dest->af == svc->af)
+ && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
+ && (dest->port == dport)) {
/* HIT */
return dest;
}
@@ -588,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/
-struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
- __be32 vaddr, __be16 vport, __u16 protocol)
+struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+ __be16 dport,
+ const union nf_inet_addr *vaddr,
+ __be16 vport, __u16 protocol)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(0, protocol, vaddr, vport);
+ svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -615,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
+ __be16 dport)
{
struct ip_vs_dest *dest, *nxt;
@@ -623,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Find the destination in trash
*/
list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
- IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
- "dest->refcnt=%d\n",
- dest->vfwmark,
- NIPQUAD(dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
- if (dest->addr == daddr &&
+ IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
+ "dest->refcnt=%d\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port),
+ atomic_read(&dest->refcnt));
+ if (dest->af == svc->af &&
+ ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
(svc->fwmark ||
- (dest->vaddr == svc->addr &&
+ (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
dest->vport == svc->port))) {
/* HIT */
return dest;
@@ -643,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Try to purge the destination from trash if not referenced
*/
if (atomic_read(&dest->refcnt) == 1) {
- IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
- "from trash\n",
- dest->vfwmark,
- NIPQUAD(dest->addr), ntohs(dest->port));
+ IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
+ "from trash\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port));
list_del(&dest->n_list);
ip_vs_dst_reset(dest);
__ip_vs_unbind_svc(dest);
@@ -685,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
- stats->conns = 0;
- stats->inpkts = 0;
- stats->outpkts = 0;
- stats->inbytes = 0;
- stats->outbytes = 0;
-
- stats->cps = 0;
- stats->inpps = 0;
- stats->outpps = 0;
- stats->inbps = 0;
- stats->outbps = 0;
-
+ memset(&stats->ustats, 0, sizeof(stats->ustats));
ip_vs_zero_estimator(stats);
spin_unlock_bh(&stats->lock);
@@ -707,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
*/
static void
__ip_vs_update_dest(struct ip_vs_service *svc,
- struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
+ struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
{
int conn_flags;
@@ -716,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
/* check if local node and update the flags */
- if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
- conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
- | IP_VS_CONN_F_LOCALNODE;
- }
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6) {
+ if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
+ conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ | IP_VS_CONN_F_LOCALNODE;
+ }
+ } else
+#endif
+ if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
+ conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ | IP_VS_CONN_F_LOCALNODE;
+ }
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
@@ -760,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
* Create a destination for the given service
*/
static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
@@ -768,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
EnterFunction(2);
- atype = inet_addr_type(&init_net, udest->addr);
- if (atype != RTN_LOCAL && atype != RTN_UNICAST)
- return -EINVAL;
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6) {
+ atype = ipv6_addr_type(&udest->addr.in6);
+ if ((!(atype & IPV6_ADDR_UNICAST) ||
+ atype & IPV6_ADDR_LINKLOCAL) &&
+ !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ return -EINVAL;
+ } else
+#endif
+ {
+ atype = inet_addr_type(&init_net, udest->addr.ip);
+ if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+ return -EINVAL;
+ }
dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
if (dest == NULL) {
@@ -778,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
return -ENOMEM;
}
+ dest->af = svc->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
- dest->addr = udest->addr;
+ ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
@@ -807,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
* Add a destination into an existing service
*/
static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
- __be32 daddr = udest->addr;
+ union nf_inet_addr daddr;
__be16 dport = udest->port;
int ret;
@@ -827,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
return -ERANGE;
}
+ ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
/*
* Check if the dest already exists in the list
*/
- dest = ip_vs_lookup_dest(svc, daddr, dport);
+ dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
if (dest != NULL) {
IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
return -EEXIST;
@@ -840,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, daddr, dport);
+ dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+
if (dest != NULL) {
- IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
- "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
- NIPQUAD(daddr), ntohs(dport),
- atomic_read(&dest->refcnt),
- dest->vfwmark,
- NIPQUAD(dest->vaddr),
- ntohs(dest->vport));
+ IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
+ "dest->refcnt=%d, service %u/%s:%u\n",
+ IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ atomic_read(&dest->refcnt),
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
+ ntohs(dest->vport));
+
__ip_vs_update_dest(svc, dest, udest);
/*
@@ -915,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
* Edit a destination in the given service
*/
static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
+ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
- __be32 daddr = udest->addr;
+ union nf_inet_addr daddr;
__be16 dport = udest->port;
EnterFunction(2);
@@ -934,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
return -ERANGE;
}
+ ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+
/*
* Lookup the destination list
*/
- dest = ip_vs_lookup_dest(svc, daddr, dport);
+ dest = ip_vs_lookup_dest(svc, &daddr, dport);
+
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
return -ENOENT;
@@ -991,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
atomic_dec(&dest->svc->refcnt);
kfree(dest);
} else {
- IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
- "dest->refcnt=%d\n",
- NIPQUAD(dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
+ "dest->refcnt=%d\n",
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
+ ntohs(dest->port),
+ atomic_read(&dest->refcnt));
list_add(&dest->n_list, &ip_vs_dest_trash);
atomic_inc(&dest->refcnt);
}
@@ -1028,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
* Delete a destination server in the given service
*/
static int
-ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
+ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
- __be32 daddr = udest->addr;
__be16 dport = udest->port;
EnterFunction(2);
- dest = ip_vs_lookup_dest(svc, daddr, dport);
+ dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
return -ENOENT;
@@ -1071,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
+ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ struct ip_vs_service **svc_p)
{
int ret = 0;
struct ip_vs_scheduler *sched = NULL;
@@ -1089,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
goto out_mod_dec;
}
+#ifdef CONFIG_IP_VS_IPV6
+ if (u->af == AF_INET6) {
+ if (!sched->supports_ipv6) {
+ ret = -EAFNOSUPPORT;
+ goto out_err;
+ }
+ if ((u->netmask < 1) || (u->netmask > 128)) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+ }
+#endif
+
svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
if (svc == NULL) {
IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
@@ -1100,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
atomic_set(&svc->usecnt, 1);
atomic_set(&svc->refcnt, 0);
+ svc->af = u->af;
svc->protocol = u->protocol;
- svc->addr = u->addr;
+ ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
svc->port = u->port;
svc->fwmark = u->fwmark;
svc->flags = u->flags;
@@ -1125,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
atomic_inc(&ip_vs_nullsvc_counter);
ip_vs_new_estimator(&svc->stats);
- ip_vs_num_services++;
+
+ /* Count only IPv4 services for old get/setsockopt interface */
+ if (svc->af == AF_INET)
+ ip_vs_num_services++;
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
@@ -1160,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
* Edit a service and bind it with a new scheduler
*/
static int
-ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
+ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
struct ip_vs_scheduler *sched, *old_sched;
int ret = 0;
@@ -1176,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
}
old_sched = sched;
+#ifdef CONFIG_IP_VS_IPV6
+ if (u->af == AF_INET6) {
+ if (!sched->supports_ipv6) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+ if ((u->netmask < 1) || (u->netmask > 128)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+#endif
+
write_lock_bh(&__ip_vs_svc_lock);
/*
@@ -1240,7 +1348,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
- ip_vs_num_services--;
+ /* Count only IPv4 services for old get/setsockopt interface */
+ if (svc->af == AF_INET)
+ ip_vs_num_services--;
+
ip_vs_kill_estimator(&svc->stats);
/* Unbind scheduler */
@@ -1748,15 +1859,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
- if (iter->table == ip_vs_svc_table)
- seq_printf(seq, "%s %08X:%04X %s ",
- ip_vs_proto_name(svc->protocol),
- ntohl(svc->addr),
- ntohs(svc->port),
- svc->scheduler->name);
- else
+ if (iter->table == ip_vs_svc_table) {
+#ifdef CONFIG_IP_VS_IPV6
+ if (svc->af == AF_INET6)
+ seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
+ ip_vs_proto_name(svc->protocol),
+ NIP6(svc->addr.in6),
+ ntohs(svc->port),
+ svc->scheduler->name);
+ else
+#endif
+ seq_printf(seq, "%s %08X:%04X %s ",
+ ip_vs_proto_name(svc->protocol),
+ ntohl(svc->addr.ip),
+ ntohs(svc->port),
+ svc->scheduler->name);
+ } else {
seq_printf(seq, "FWM %08X %s ",
svc->fwmark, svc->scheduler->name);
+ }
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
seq_printf(seq, "persistent %d %08X\n",
@@ -1766,13 +1887,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
list_for_each_entry(dest, &svc->destinations, n_list) {
- seq_printf(seq,
- " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
- ntohl(dest->addr), ntohs(dest->port),
- ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
- atomic_read(&dest->weight),
- atomic_read(&dest->activeconns),
- atomic_read(&dest->inactconns));
+#ifdef CONFIG_IP_VS_IPV6
+ if (dest->af == AF_INET6)
+ seq_printf(seq,
+ " -> [" NIP6_FMT "]:%04X"
+ " %-7s %-6d %-10d %-10d\n",
+ NIP6(dest->addr.in6),
+ ntohs(dest->port),
+ ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+ atomic_read(&dest->weight),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->inactconns));
+ else
+#endif
+ seq_printf(seq,
+ " -> %08X:%04X "
+ "%-7s %-6d %-10d %-10d\n",
+ ntohl(dest->addr.ip),
+ ntohs(dest->port),
+ ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
+ atomic_read(&dest->weight),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->inactconns));
+
}
}
return 0;
@@ -1816,20 +1953,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
spin_lock_bh(&ip_vs_stats.lock);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
- ip_vs_stats.inpkts, ip_vs_stats.outpkts,
- (unsigned long long) ip_vs_stats.inbytes,
- (unsigned long long) ip_vs_stats.outbytes);
+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
+ ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
+ (unsigned long long) ip_vs_stats.ustats.inbytes,
+ (unsigned long long) ip_vs_stats.ustats.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq,"%8X %8X %8X %16X %16X\n",
- ip_vs_stats.cps,
- ip_vs_stats.inpps,
- ip_vs_stats.outpps,
- ip_vs_stats.inbps,
- ip_vs_stats.outbps);
+ ip_vs_stats.ustats.cps,
+ ip_vs_stats.ustats.inpps,
+ ip_vs_stats.ustats.outpps,
+ ip_vs_stats.ustats.inbps,
+ ip_vs_stats.ustats.outbps);
spin_unlock_bh(&ip_vs_stats.lock);
return 0;
@@ -1904,14 +2041,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
[SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
};
+static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
+ struct ip_vs_service_user *usvc_compat)
+{
+ usvc->af = AF_INET;
+ usvc->protocol = usvc_compat->protocol;
+ usvc->addr.ip = usvc_compat->addr;
+ usvc->port = usvc_compat->port;
+ usvc->fwmark = usvc_compat->fwmark;
+
+ /* Deep copy of sched_name is not needed here */
+ usvc->sched_name = usvc_compat->sched_name;
+
+ usvc->flags = usvc_compat->flags;
+ usvc->timeout = usvc_compat->timeout;
+ usvc->netmask = usvc_compat->netmask;
+}
+
+static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
+ struct ip_vs_dest_user *udest_compat)
+{
+ udest->addr.ip = udest_compat->addr;
+ udest->port = udest_compat->port;
+ udest->conn_flags = udest_compat->conn_flags;
+ udest->weight = udest_compat->weight;
+ udest->u_threshold = udest_compat->u_threshold;
+ udest->l_threshold = udest_compat->l_threshold;
+}
+
static int
do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
unsigned char arg[MAX_ARG_LEN];
- struct ip_vs_service_user *usvc;
+ struct ip_vs_service_user *usvc_compat;
+ struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
- struct ip_vs_dest_user *udest;
+ struct ip_vs_dest_user *udest_compat;
+ struct ip_vs_dest_user_kern udest;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -1951,35 +2118,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
}
- usvc = (struct ip_vs_service_user *)arg;
- udest = (struct ip_vs_dest_user *)(usvc + 1);
+ usvc_compat = (struct ip_vs_service_user *)arg;
+ udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
+
+ /* We only use the new structs internally, so copy userspace compat
+ * structs to extended internal versions */
+ ip_vs_copy_usvc_compat(&usvc, usvc_compat);
+ ip_vs_copy_udest_compat(&udest, udest_compat);
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
- if (!usvc->fwmark && !usvc->addr && !usvc->port) {
+ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
ret = ip_vs_zero_all();
goto out_unlock;
}
}
/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
- if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
+ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
- usvc->protocol, NIPQUAD(usvc->addr),
- ntohs(usvc->port), usvc->sched_name);
+ usvc.protocol, NIPQUAD(usvc.addr.ip),
+ ntohs(usvc.port), usvc.sched_name);
ret = -EFAULT;
goto out_unlock;
}
/* Lookup the exact service by <protocol, addr, port> or fwmark */
- if (usvc->fwmark == 0)
- svc = __ip_vs_service_get(usvc->protocol,
- usvc->addr, usvc->port);
+ if (usvc.fwmark == 0)
+ svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+ svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
if (cmd != IP_VS_SO_SET_ADD
- && (svc == NULL || svc->protocol != usvc->protocol)) {
+ && (svc == NULL || svc->protocol != usvc.protocol)) {
ret = -ESRCH;
goto out_unlock;
}
@@ -1989,10 +2161,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(usvc, &svc);
+ ret = ip_vs_add_service(&usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
- ret = ip_vs_edit_service(svc, usvc);
+ ret = ip_vs_edit_service(svc, &usvc);
break;
case IP_VS_SO_SET_DEL:
ret = ip_vs_del_service(svc);
@@ -2003,13 +2175,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
ret = ip_vs_zero_service(svc);
break;
case IP_VS_SO_SET_ADDDEST:
- ret = ip_vs_add_dest(svc, udest);
+ ret = ip_vs_add_dest(svc, &udest);
break;
case IP_VS_SO_SET_EDITDEST:
- ret = ip_vs_edit_dest(svc, udest);
+ ret = ip_vs_edit_dest(svc, &udest);
break;
case IP_VS_SO_SET_DELDEST:
- ret = ip_vs_del_dest(svc, udest);
+ ret = ip_vs_del_dest(svc, &udest);
break;
default:
ret = -EINVAL;
@@ -2032,7 +2204,7 @@ static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
{
spin_lock_bh(&src->lock);
- memcpy(dst, src, (char*)&src->lock - (char*)src);
+ memcpy(dst, &src->ustats, sizeof(*dst));
spin_unlock_bh(&src->lock);
}
@@ -2040,7 +2212,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
dst->protocol = src->protocol;
- dst->addr = src->addr;
+ dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
@@ -2062,6 +2234,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ /* Only expose IPv4 entries to old interface */
+ if (svc->af != AF_INET)
+ continue;
+
if (count >= get->num_services)
goto out;
memset(&entry, 0, sizeof(entry));
@@ -2077,6 +2253,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ /* Only expose IPv4 entries to old interface */
+ if (svc->af != AF_INET)
+ continue;
+
if (count >= get->num_services)
goto out;
memset(&entry, 0, sizeof(entry));
@@ -2098,13 +2278,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
+ union nf_inet_addr addr = { .ip = get->addr };
int ret = 0;
if (get->fwmark)
- svc = __ip_vs_svc_fwm_get(get->fwmark);
+ svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
else
- svc = __ip_vs_service_get(get->protocol,
- get->addr, get->port);
+ svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
+ get->port);
+
if (svc) {
int count = 0;
struct ip_vs_dest *dest;
@@ -2114,7 +2296,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
if (count >= get->num_dests)
break;
- entry.addr = dest->addr;
+ entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
entry.weight = atomic_read(&dest->weight);
@@ -2239,13 +2421,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_service_entry *entry;
struct ip_vs_service *svc;
+ union nf_inet_addr addr;
entry = (struct ip_vs_service_entry *)arg;
+ addr.ip = entry->addr;
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_get(entry->fwmark);
+ svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_get(entry->protocol,
- entry->addr, entry->port);
+ svc = __ip_vs_service_get(AF_INET, entry->protocol,
+ &addr, entry->port);
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2396,16 +2580,16 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
spin_lock_bh(&stats->lock);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
spin_unlock_bh(&stats->lock);
@@ -2430,7 +2614,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
if (!nl_service)
return -EMSGSIZE;
- NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
if (svc->fwmark) {
NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
@@ -2516,7 +2700,7 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -2536,8 +2720,12 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
return -EINVAL;
- /* For now, only support IPv4 */
- if (nla_get_u16(nla_af) != AF_INET)
+ usvc->af = nla_get_u16(nla_af);
+#ifdef CONFIG_IP_VS_IPV6
+ if (usvc->af != AF_INET && usvc->af != AF_INET6)
+#else
+ if (usvc->af != AF_INET)
+#endif
return -EAFNOSUPPORT;
if (nla_fwmark) {
@@ -2569,10 +2757,10 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
/* prefill flags from service if it already exists */
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+ svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
- usvc->port);
+ svc = __ip_vs_service_get(usvc->af, usvc->protocol,
+ &usvc->addr, usvc->port);
if (svc) {
usvc->flags = svc->flags;
ip_vs_service_put(svc);
@@ -2582,9 +2770,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
/* set new flags from userland */
usvc->flags = (usvc->flags & ~flags.mask) |
(flags.flags & flags.mask);
-
- strlcpy(usvc->sched_name, nla_data(nla_sched),
- sizeof(usvc->sched_name));
+ usvc->sched_name = nla_data(nla_sched);
usvc->timeout = nla_get_u32(nla_timeout);
usvc->netmask = nla_get_u32(nla_netmask);
}
@@ -2594,7 +2780,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
{
- struct ip_vs_service_user usvc;
+ struct ip_vs_service_user_kern usvc;
int ret;
ret = ip_vs_genl_parse_service(&usvc, nla, 0);
@@ -2602,10 +2788,10 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
return ERR_PTR(ret);
if (usvc.fwmark)
- return __ip_vs_svc_fwm_get(usvc.fwmark);
+ return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
else
- return __ip_vs_service_get(usvc.protocol, usvc.addr,
- usvc.port);
+ return __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
}
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2704,7 +2890,7 @@ out_err:
return skb->len;
}
-static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
@@ -2860,8 +3046,8 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc = NULL;
- struct ip_vs_service_user usvc;
- struct ip_vs_dest_user udest;
+ struct ip_vs_service_user_kern usvc;
+ struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
@@ -2913,9 +3099,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
- svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+ svc = __ip_vs_service_get(usvc.af, usvc.protocol,
+ &usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+ svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
/* Unless we're adding a new service, the service must already exist */
if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index fa66824d264..a16943fd72f 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
- NIPQUAD(dest->addr),
+ NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
@@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
.init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc,
.update_service = ip_vs_dh_update_svc,
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 4fb620ec208..2eb2860dabb 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
- n_conns = s->conns;
- n_inpkts = s->inpkts;
- n_outpkts = s->outpkts;
- n_inbytes = s->inbytes;
- n_outbytes = s->outbytes;
+ n_conns = s->ustats.conns;
+ n_inpkts = s->ustats.inpkts;
+ n_outpkts = s->ustats.outpkts;
+ n_inbytes = s->ustats.inbytes;
+ n_outbytes = s->ustats.outbytes;
/* scaled by 2^10, but divided 2 seconds */
rate = (n_conns - e->last_conns)<<9;
e->last_conns = n_conns;
e->cps += ((long)rate - (long)e->cps)>>2;
- s->cps = (e->cps+0x1FF)>>10;
+ s->ustats.cps = (e->cps+0x1FF)>>10;
rate = (n_inpkts - e->last_inpkts)<<9;
e->last_inpkts = n_inpkts;
e->inpps += ((long)rate - (long)e->inpps)>>2;
- s->inpps = (e->inpps+0x1FF)>>10;
+ s->ustats.inpps = (e->inpps+0x1FF)>>10;
rate = (n_outpkts - e->last_outpkts)<<9;
e->last_outpkts = n_outpkts;
e->outpps += ((long)rate - (long)e->outpps)>>2;
- s->outpps = (e->outpps+0x1FF)>>10;
+ s->ustats.outpps = (e->outpps+0x1FF)>>10;
rate = (n_inbytes - e->last_inbytes)<<4;
e->last_inbytes = n_inbytes;
e->inbps += ((long)rate - (long)e->inbps)>>2;
- s->inbps = (e->inbps+0xF)>>5;
+ s->ustats.inbps = (e->inbps+0xF)>>5;
rate = (n_outbytes - e->last_outbytes)<<4;
e->last_outbytes = n_outbytes;
e->outbps += ((long)rate - (long)e->outbps)>>2;
- s->outbps = (e->outbps+0xF)>>5;
+ s->ustats.outbps = (e->outbps+0xF)>>5;
spin_unlock(&s->lock);
}
spin_unlock(&est_lock);
@@ -108,20 +108,20 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
INIT_LIST_HEAD(&est->list);
- est->last_conns = stats->conns;
- est->cps = stats->cps<<10;
+ est->last_conns = stats->ustats.conns;
+ est->cps = stats->ustats.cps<<10;
- est->last_inpkts = stats->inpkts;
- est->inpps = stats->inpps<<10;
+ est->last_inpkts = stats->ustats.inpkts;
+ est->inpps = stats->ustats.inpps<<10;
- est->last_outpkts = stats->outpkts;
- est->outpps = stats->outpps<<10;
+ est->last_outpkts = stats->ustats.outpkts;
+ est->outpps = stats->ustats.outpps<<10;
- est->last_inbytes = stats->inbytes;
- est->inbps = stats->inbps<<5;
+ est->last_inbytes = stats->ustats.inbytes;
+ est->inbps = stats->ustats.inbps<<5;
- est->last_outbytes = stats->outbytes;
- est->outbps = stats->outbps<<5;
+ est->last_outbytes = stats->ustats.outbytes;
+ est->outbps = stats->ustats.outbps<<5;
spin_lock_bh(&est_lock);
list_add(&est->list, &est_list);
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index c1c758e4f73..2e7dbd8b73a 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -140,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
- __be32 from;
+ union nf_inet_addr from;
__be16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
int ret;
+#ifdef CONFIG_IP_VS_IPV6
+ /* This application helper doesn't work with IPv6 yet,
+ * so turn this into a no-op for IPv6 packets
+ */
+ if (cp->af == AF_INET6)
+ return 1;
+#endif
+
*diff = 0;
/* Only useful for established sessions */
@@ -166,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
sizeof(SERVER_STRING)-1, ')',
- &from, &port,
+ &from.ip, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n",
- NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
+ NIPQUAD(from.ip), ntohs(port),
+ NIPQUAD(cp->caddr.ip), 0);
/*
* Now update or create an connection entry for it
*/
- n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
- cp->caddr, 0);
+ n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
+ &cp->caddr, 0);
if (!n_cp) {
- n_cp = ip_vs_conn_new(IPPROTO_TCP,
- cp->caddr, 0,
- cp->vaddr, port,
- from, port,
+ n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+ &cp->caddr, 0,
+ &cp->vaddr, port,
+ &from, port,
IP_VS_CONN_F_NO_CPORT,
cp->dest);
if (!n_cp)
@@ -196,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Replace the old passive address with the new one
*/
- from = n_cp->vaddr;
+ from.ip = n_cp->vaddr.ip;
port = n_cp->vport;
- sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
+ sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
(ntohs(port)>>8)&255, ntohs(port)&255);
buf_len = strlen(buf);
@@ -243,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_start, *data_limit;
char *start, *end;
- __be32 to;
+ union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
+#ifdef CONFIG_IP_VS_IPV6
+ /* This application helper doesn't work with IPv6 yet,
+ * so turn this into a no-op for IPv6 packets
+ */
+ if (cp->af == AF_INET6)
+ return 1;
+#endif
+
/* no diff required for incoming packets */
*diff = 0;
@@ -291,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
if (ip_vs_ftp_get_addrport(data_start, data_limit,
CLIENT_STRING, sizeof(CLIENT_STRING)-1,
- '\r', &to, &port,
+ '\r', &to.ip, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
- NIPQUAD(to), ntohs(port));
+ NIPQUAD(to.ip), ntohs(port));
/* Passive mode off */
cp->app_data = NULL;
@@ -306,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
- NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
+ NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
- n_cp = ip_vs_conn_in_get(iph->protocol,
- to, port,
- cp->vaddr, htons(ntohs(cp->vport)-1));
+ n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
+ &to, port,
+ &cp->vaddr, htons(ntohs(cp->vport)-1));
if (!n_cp) {
- n_cp = ip_vs_conn_new(IPPROTO_TCP,
- to, port,
- cp->vaddr, htons(ntohs(cp->vport)-1),
- cp->daddr, htons(ntohs(cp->dport)-1),
+ n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
+ &to, port,
+ &cp->vaddr, htons(ntohs(cp->vport)-1),
+ &cp->daddr, htons(ntohs(cp->dport)-1),
0,
cp->dest);
if (!n_cp)
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index d2a43aa3fe4..6ecef3518ca 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -422,7 +422,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
+ NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
@@ -506,7 +506,7 @@ out:
IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
- NIPQUAD(dest->addr),
+ NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
@@ -522,6 +522,9 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
.init_service = ip_vs_lblc_init_svc,
.done_service = ip_vs_lblc_done_svc,
.schedule = ip_vs_lblc_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 375a1ffb6b6..1f75ea83bcf 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
+ NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
@@ -250,7 +250,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(most->addr), ntohs(most->port),
+ NIPQUAD(most->addr.ip), ntohs(most->port),
atomic_read(&most->activeconns),
atomic_read(&most->refcnt),
atomic_read(&most->weight), moh);
@@ -598,7 +598,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
+ NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
@@ -706,7 +706,7 @@ out:
IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
- NIPQUAD(dest->addr),
+ NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
@@ -722,6 +722,9 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
.init_service = ip_vs_lblcr_init_svc,
.done_service = ip_vs_lblcr_done_svc,
.schedule = ip_vs_lblcr_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index 2c3de1b6351..b69f808ac46 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -67,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
if (least)
- IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
- atomic_read(&least->activeconns),
- atomic_read(&least->inactconns));
+ IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->inactconns));
return least;
}
@@ -81,6 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.schedule = ip_vs_lc_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index 5330d5a2de1..9a2d8033f08 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -99,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
return NULL;
out:
- IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
- "activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
- atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
- atomic_read(&least->weight), loh);
+ IP_VS_DBG_BUF(6, "NQ: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
return least;
}
@@ -116,6 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.schedule = ip_vs_nq_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 6099a88fc20..b06da1c3445 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -151,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state)
}
-void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
- const struct sk_buff *skb,
- int offset,
- const char *msg)
+static void
+ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
{
char buf[128];
struct iphdr _iph, *ih;
@@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
+{
+ char buf[192];
+ struct ipv6hdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else if (ih->nexthdr == IPPROTO_FRAGMENT)
+ sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
+ pp->name, NIP6(ih->saddr),
+ NIP6(ih->daddr));
+ else {
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
+ pp->name,
+ NIP6(ih->saddr),
+ NIP6(ih->daddr));
+ else
+ sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
+ pp->name,
+ NIP6(ih->saddr),
+ ntohs(pptr[0]),
+ NIP6(ih->daddr),
+ ntohs(pptr[1]));
+ }
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+
+void
+ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
+ const struct sk_buff *skb,
+ int offset,
+ const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
+ else
+#endif
+ ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
+}
+
int __init ip_vs_protocol_init(void)
{
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 3f9ebd7639a..2b18a78d039 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -39,25 +39,23 @@ struct isakmp_hdr {
static struct ip_vs_conn *
-ah_esp_conn_in_get(const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
- const struct iphdr *iph,
- unsigned int proto_off,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->saddr,
+ cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+ &iph->saddr,
htons(PORT_ISAKMP),
- iph->daddr,
+ &iph->daddr,
htons(PORT_ISAKMP));
} else {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->daddr,
+ cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
+ &iph->daddr,
htons(PORT_ISAKMP),
- iph->saddr,
+ &iph->saddr,
htons(PORT_ISAKMP));
}
@@ -66,12 +64,12 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
* We are not sure if the packet is from our
* service, so our conn_schedule hook should return NF_ACCEPT
*/
- IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
- "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
- inverse ? "ICMP+" : "",
- pp->name,
- NIPQUAD(iph->saddr),
- NIPQUAD(iph->daddr));
+ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
+ "%s%s %s->%s\n",
+ inverse ? "ICMP+" : "",
+ pp->name,
+ IP_VS_DBG_ADDR(af, &iph->saddr),
+ IP_VS_DBG_ADDR(af, &iph->daddr));
}
return cp;
@@ -79,32 +77,35 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
static struct ip_vs_conn *
-ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+ah_esp_conn_out_get(int af, const struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph,
+ unsigned int proto_off,
+ int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->saddr,
+ cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+ &iph->saddr,
htons(PORT_ISAKMP),
- iph->daddr,
+ &iph->daddr,
htons(PORT_ISAKMP));
} else {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->daddr,
+ cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
+ &iph->daddr,
htons(PORT_ISAKMP),
- iph->saddr,
+ &iph->saddr,
htons(PORT_ISAKMP));
}
if (!cp) {
- IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
- "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
- inverse ? "ICMP+" : "",
- pp->name,
- NIPQUAD(iph->saddr),
- NIPQUAD(iph->daddr));
+ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
+ "%s%s %s->%s\n",
+ inverse ? "ICMP+" : "",
+ pp->name,
+ IP_VS_DBG_ADDR(af, &iph->saddr),
+ IP_VS_DBG_ADDR(af, &iph->daddr));
}
return cp;
@@ -112,8 +113,7 @@ ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static int
-ah_esp_conn_schedule(struct sk_buff *skb,
- struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
/*
@@ -125,8 +125,8 @@ ah_esp_conn_schedule(struct sk_buff *skb,
static void
-ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
- int offset, const char *msg)
+ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
{
char buf[256];
struct iphdr _iph, *ih;
@@ -142,6 +142,38 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
+#ifdef CONFIG_IP_VS_IPV6
+static void
+ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
+{
+ char buf[256];
+ struct ipv6hdr _iph, *ih;
+
+ ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
+ if (ih == NULL)
+ sprintf(buf, "%s TRUNCATED", pp->name);
+ else
+ sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
+ pp->name, NIP6(ih->saddr),
+ NIP6(ih->daddr));
+
+ printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+}
+#endif
+
+static void
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb->protocol == __constant_htons(ETH_P_IPV6))
+ ah_esp_debug_packet_v6(pp, skb, offset, msg);
+ else
+#endif
+ ah_esp_debug_packet_v4(pp, skb, offset, msg);
+}
+
static void ah_esp_init(struct ip_vs_protocol *pp)
{
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index d0ea467986a..537f616776d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -25,8 +25,9 @@
static struct ip_vs_conn *
-tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
{
__be16 _ports[2], *pptr;
@@ -35,19 +36,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
- return ip_vs_conn_in_get(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1]);
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
} else {
- return ip_vs_conn_in_get(iph->protocol,
- iph->daddr, pptr[1],
- iph->saddr, pptr[0]);
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
}
}
static struct ip_vs_conn *
-tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
{
__be16 _ports[2], *pptr;
@@ -56,34 +58,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
- return ip_vs_conn_out_get(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1]);
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
} else {
- return ip_vs_conn_out_get(iph->protocol,
- iph->daddr, pptr[1],
- iph->saddr, pptr[0]);
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
}
}
static int
-tcp_conn_schedule(struct sk_buff *skb,
- struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
+ struct ip_vs_iphdr iph;
- th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
if (th->syn &&
- (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
- ip_hdr(skb)->daddr, th->dest))) {
+ (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
+ th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -110,22 +114,62 @@ tcp_conn_schedule(struct sk_buff *skb,
static inline void
-tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
+tcp_fast_csum_update(int af, struct tcphdr *tcph,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
__be16 oldport, __be16 newport)
{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcph->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(tcph->check))));
+ else
+#endif
tcph->check =
- csum_fold(ip_vs_check_diff4(oldip, newip,
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(tcph->check))));
}
+static inline void
+tcp_partial_csum_update(int af, struct tcphdr *tcph,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcph->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(tcph->check))));
+ else
+#endif
+ tcph->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(tcph->check))));
+}
+
+
static int
tcp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(skb);
+ unsigned int tcphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
+ oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -133,7 +177,7 @@ tcp_snat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(skb, pp))
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
@@ -141,13 +185,17 @@ tcp_snat_handler(struct sk_buff *skb,
return 0;
}
- tcph = (void *)ip_hdr(skb) + tcphoff;
+ tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
- if (!cp->app) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - tcphoff));
+ } else if (!cp->app) {
/* Only port and addr are changed, do fast csum update */
- tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
+ tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
@@ -155,9 +203,20 @@ tcp_snat_handler(struct sk_buff *skb,
/* full checksum calculation */
tcph->check = 0;
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- skb->len - tcphoff,
- cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
+ &cp->caddr.in6,
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
+ cp->caddr.ip,
+ skb->len - tcphoff,
+ cp->protocol,
+ skb->csum);
+
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check,
(char*)&(tcph->check) - (char*)tcph);
@@ -171,7 +230,16 @@ tcp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(skb);
+ unsigned int tcphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
+ oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
@@ -179,7 +247,7 @@ tcp_dnat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(skb, pp))
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
@@ -190,15 +258,19 @@ tcp_dnat_handler(struct sk_buff *skb,
return 0;
}
- tcph = (void *)ip_hdr(skb) + tcphoff;
+ tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->dest = cp->dport;
/*
* Adjust TCP checksums
*/
- if (!cp->app) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - tcphoff));
+ } else if (!cp->app) {
/* Only port and addr are changed, do fast csum update */
- tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
+ tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
@@ -206,9 +278,19 @@ tcp_dnat_handler(struct sk_buff *skb,
/* full checksum calculation */
tcph->check = 0;
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- skb->len - tcphoff,
- cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ tcph->check = csum_ipv6_magic(&cp->caddr.in6,
+ &cp->daddr.in6,
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ tcph->check = csum_tcpudp_magic(cp->caddr.ip,
+ cp->daddr.ip,
+ skb->len - tcphoff,
+ cp->protocol,
+ skb->csum);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
@@ -216,21 +298,43 @@ tcp_dnat_handler(struct sk_buff *skb,
static int
-tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
- const unsigned int tcphoff = ip_hdrlen(skb);
+ unsigned int tcphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ tcphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ tcphoff = ip_hdrlen(skb);
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
- skb->len - tcphoff,
- ip_hdr(skb)->protocol, skb->csum)) {
- IP_VS_DBG_RL_PKT(0, pp, skb, 0,
- "Failed checksum for");
- return 0;
- }
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - tcphoff,
+ ipv6_hdr(skb)->nexthdr,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ } else
+#endif
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - tcphoff,
+ ip_hdr(skb)->protocol,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
break;
default:
/* No need to checksum. */
@@ -419,19 +523,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
if (new_state != cp->state) {
struct ip_vs_dest *dest = cp->dest;
- IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
- "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
- (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
- th->syn? 'S' : '.',
- th->fin? 'F' : '.',
- th->ack? 'A' : '.',
- th->rst? 'R' : '.',
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- tcp_state_name(cp->state),
- tcp_state_name(new_state),
- atomic_read(&cp->refcnt));
+ IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
+ "%s:%d state: %s->%s conn->refcnt:%d\n",
+ pp->name,
+ ((state_off == TCP_DIR_OUTPUT) ?
+ "output " : "input "),
+ th->syn ? 'S' : '.',
+ th->fin ? 'F' : '.',
+ th->ack ? 'A' : '.',
+ th->rst ? 'R' : '.',
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ tcp_state_name(cp->state),
+ tcp_state_name(new_state),
+ atomic_read(&cp->refcnt));
+
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(new_state != IP_VS_TCP_S_ESTABLISHED)) {
@@ -461,7 +569,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+#ifdef CONFIG_IP_VS_IPV6
+ int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+ int ihl = ip_hdrlen(skb);
+#endif
+
+ th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
@@ -546,12 +660,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
break;
spin_unlock(&tcp_app_lock);
- IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
- "%u.%u.%u.%u:%u to app %s on port %u\n",
- __func__,
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- inc->name, ntohs(inc->port));
+ IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+ "%s:%u to app %s on port %u\n",
+ __func__,
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ inc->name, ntohs(inc->port));
+
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index c6be5d56823..e3ee26bd1de 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -24,8 +24,9 @@
#include <net/ip.h>
static struct ip_vs_conn *
-udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
@@ -35,13 +36,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
- cp = ip_vs_conn_in_get(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1]);
+ cp = ip_vs_conn_in_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
} else {
- cp = ip_vs_conn_in_get(iph->protocol,
- iph->daddr, pptr[1],
- iph->saddr, pptr[0]);
+ cp = ip_vs_conn_in_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
}
return cp;
@@ -49,25 +50,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn *
-udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph, unsigned int proto_off,
+ int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_ports), _ports);
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
- cp = ip_vs_conn_out_get(iph->protocol,
- iph->saddr, pptr[0],
- iph->daddr, pptr[1]);
+ cp = ip_vs_conn_out_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
} else {
- cp = ip_vs_conn_out_get(iph->protocol,
- iph->daddr, pptr[1],
- iph->saddr, pptr[0]);
+ cp = ip_vs_conn_out_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
}
return cp;
@@ -75,21 +76,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static int
-udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
+ struct ip_vs_iphdr iph;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
- uh = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_udph), &_udph);
+ uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
- if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
- ip_hdr(skb)->daddr, uh->dest))) {
+ svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ &iph.daddr, uh->dest);
+ if (svc) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -116,23 +120,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static inline void
-udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
+udp_fast_csum_update(int af, struct udphdr *uhdr,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
__be16 oldport, __be16 newport)
{
- uhdr->check =
- csum_fold(ip_vs_check_diff4(oldip, newip,
- ip_vs_check_diff2(oldport, newport,
- ~csum_unfold(uhdr->check))));
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ uhdr->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(uhdr->check))));
+ else
+#endif
+ uhdr->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldport, newport,
+ ~csum_unfold(uhdr->check))));
if (!uhdr->check)
uhdr->check = CSUM_MANGLED_0;
}
+static inline void
+udp_partial_csum_update(int af, struct udphdr *uhdr,
+ const union nf_inet_addr *oldip,
+ const union nf_inet_addr *newip,
+ __be16 oldlen, __be16 newlen)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ uhdr->check =
+ csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(uhdr->check))));
+ else
+#endif
+ uhdr->check =
+ csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
+ ip_vs_check_diff2(oldlen, newlen,
+ ~csum_unfold(uhdr->check))));
+}
+
+
static int
udp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- const unsigned int udphoff = ip_hdrlen(skb);
+ unsigned int udphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
+ oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -140,7 +184,7 @@ udp_snat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(skb, pp))
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
@@ -150,15 +194,19 @@ udp_snat_handler(struct sk_buff *skb,
return 0;
}
- udph = (void *)ip_hdr(skb) + udphoff;
+ udph = (void *)skb_network_header(skb) + udphoff;
udph->source = cp->vport;
/*
* Adjust UDP checksums
*/
- if (!cp->app && (udph->check != 0)) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - udphoff));
+ } else if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
- udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
+ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
@@ -166,9 +214,19 @@ udp_snat_handler(struct sk_buff *skb,
/* full checksum calculation */
udph->check = 0;
skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
- udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- skb->len - udphoff,
- cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udph->check = csum_ipv6_magic(&cp->vaddr.in6,
+ &cp->caddr.in6,
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ udph->check = csum_tcpudp_magic(cp->vaddr.ip,
+ cp->caddr.ip,
+ skb->len - udphoff,
+ cp->protocol,
+ skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -184,7 +242,16 @@ udp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = ip_hdrlen(skb);
+ unsigned int udphoff;
+ int oldlen;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
+ oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
@@ -192,7 +259,7 @@ udp_dnat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(skb, pp))
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
@@ -203,15 +270,19 @@ udp_dnat_handler(struct sk_buff *skb,
return 0;
}
- udph = (void *)ip_hdr(skb) + udphoff;
+ udph = (void *)skb_network_header(skb) + udphoff;
udph->dest = cp->dport;
/*
* Adjust UDP checksums
*/
- if (!cp->app && (udph->check != 0)) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
+ htonl(oldlen),
+ htonl(skb->len - udphoff));
+ } else if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
- udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
+ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
@@ -219,9 +290,19 @@ udp_dnat_handler(struct sk_buff *skb,
/* full checksum calculation */
udph->check = 0;
skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
- udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- skb->len - udphoff,
- cp->protocol, skb->csum);
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ udph->check = csum_ipv6_magic(&cp->caddr.in6,
+ &cp->daddr.in6,
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
+ else
+#endif
+ udph->check = csum_tcpudp_magic(cp->caddr.ip,
+ cp->daddr.ip,
+ skb->len - udphoff,
+ cp->protocol,
+ skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -231,10 +312,17 @@ udp_dnat_handler(struct sk_buff *skb,
static int
-udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
struct udphdr _udph, *uh;
- const unsigned int udphoff = ip_hdrlen(skb);
+ unsigned int udphoff;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ udphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ udphoff = ip_hdrlen(skb);
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
if (uh == NULL)
@@ -246,15 +334,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - udphoff,
- ip_hdr(skb)->protocol,
- skb->csum)) {
- IP_VS_DBG_RL_PKT(0, pp, skb, 0,
- "Failed checksum for");
- return 0;
- }
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - udphoff,
+ ipv6_hdr(skb)->nexthdr,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ } else
+#endif
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - udphoff,
+ ip_hdr(skb)->protocol,
+ skb->csum)) {
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
break;
default:
/* No need to checksum. */
@@ -340,12 +441,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
break;
spin_unlock(&udp_app_lock);
- IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
- "%u.%u.%u.%u:%u to app %s on port %u\n",
- __func__,
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- inc->name, ntohs(inc->port));
+ IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+ "%s:%u to app %s on port %u\n",
+ __func__,
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ inc->name, ntohs(inc->port));
+
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index f7492911753..a22195f68ac 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -74,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
out:
svc->sched_data = q;
write_unlock(&svc->sched_lock);
- IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
- "activeconns %d refcnt %d weight %d\n",
- NIPQUAD(dest->addr), ntohs(dest->port),
- atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+ IP_VS_DBG_BUF(6, "RR: server %s:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
@@ -89,6 +89,9 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.init_service = ip_vs_rr_init_svc,
.update_service = ip_vs_rr_update_svc,
.schedule = ip_vs_rr_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 53f73bea66c..7d2f22f04b8 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -101,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
- IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
- "activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
- atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
- atomic_read(&least->weight), loh);
+ IP_VS_DBG_BUF(6, "SED: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
return least;
}
@@ -118,6 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.schedule = ip_vs_sed_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 7b979e22805..1d96de27fef 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->saddr),
- NIPQUAD(dest->addr),
+ NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
@@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 0,
+#endif
.init_service = ip_vs_sh_init_svc,
.done_service = ip_vs_sh_done_svc,
.update_service = ip_vs_sh_update_svc,
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index a652da2c320..28237a5f62e 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
s->cport = cp->cport;
s->vport = cp->vport;
s->dport = cp->dport;
- s->caddr = cp->caddr;
- s->vaddr = cp->vaddr;
- s->daddr = cp->daddr;
+ s->caddr = cp->caddr.ip;
+ s->vaddr = cp->vaddr.ip;
+ s->daddr = cp->daddr.ip;
s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
s->state = htons(cp->state);
if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
@@ -366,21 +366,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
}
if (!(flags & IP_VS_CONN_F_TEMPLATE))
- cp = ip_vs_conn_in_get(s->protocol,
- s->caddr, s->cport,
- s->vaddr, s->vport);
+ cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport);
else
- cp = ip_vs_ct_in_get(s->protocol,
- s->caddr, s->cport,
- s->vaddr, s->vport);
+ cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport);
if (!cp) {
/*
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
* but still handled.
*/
- dest = ip_vs_find_dest(s->daddr, s->dport,
- s->vaddr, s->vport,
+ dest = ip_vs_find_dest(AF_INET,
+ (union nf_inet_addr *)&s->daddr,
+ s->dport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport,
s->protocol);
/* Set the approprite ativity flag */
if (s->protocol == IPPROTO_TCP) {
@@ -389,10 +396,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
else
flags &= ~IP_VS_CONN_F_INACTIVE;
}
- cp = ip_vs_conn_new(s->protocol,
- s->caddr, s->cport,
- s->vaddr, s->vport,
- s->daddr, s->dport,
+ cp = ip_vs_conn_new(AF_INET, s->protocol,
+ (union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (union nf_inet_addr *)&s->vaddr,
+ s->vport,
+ (union nf_inet_addr *)&s->daddr,
+ s->dport,
flags, dest);
if (dest)
atomic_dec(&dest->refcnt);
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index df7ad8d7476..8c596e71259 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -89,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
- IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
- "activeconns %d refcnt %d weight %d overhead %d\n",
- NIPQUAD(least->addr), ntohs(least->port),
- atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
- atomic_read(&least->weight), loh);
+ IP_VS_DBG_BUF(6, "WLC: server %s:%u "
+ "activeconns %d refcnt %d weight %d overhead %d\n",
+ IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight), loh);
return least;
}
@@ -106,6 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.schedule = ip_vs_wlc_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 0d86a79b87b..7ea92fed50b 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
- IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
- "activeconns %d refcnt %d weight %d\n",
- NIPQUAD(dest->addr), ntohs(dest->port),
- atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt),
- atomic_read(&dest->weight));
+ IP_VS_DBG_BUF(6, "WRR: server %s:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->activeconns),
+ atomic_read(&dest->refcnt),
+ atomic_read(&dest->weight));
out:
write_unlock(&svc->sched_lock);
@@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
+#ifdef CONFIG_IP_VS_IPV6
+ .supports_ipv6 = 1,
+#endif
.init_service = ip_vs_wrr_init_svc,
.done_service = ip_vs_wrr_done_svc,
.update_service = ip_vs_wrr_update_svc,
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 9892d4aca42..02ddc2b3ce2 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -20,6 +20,9 @@
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h> /* for ip_route_output */
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
@@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
if (!dst)
return NULL;
- if ((dst->obsolete || rtos != dest->dst_rtos) &&
+ if ((dst->obsolete
+ || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
dst->ops->check(dst, cookie) == NULL) {
dest->dst_cache = NULL;
dst_release(dst);
@@ -71,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
.oif = 0,
.nl_u = {
.ip4_u = {
- .daddr = dest->addr,
+ .daddr = dest->addr.ip,
.saddr = 0,
.tos = rtos, } },
};
@@ -80,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip_route_output error, "
"dest: %u.%u.%u.%u\n",
- NIPQUAD(dest->addr));
+ NIPQUAD(dest->addr.ip));
return NULL;
}
__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
- NIPQUAD(dest->addr),
+ NIPQUAD(dest->addr.ip),
atomic_read(&rt->u.dst.__refcnt), rtos);
}
spin_unlock(&dest->dst_lock);
@@ -94,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
.oif = 0,
.nl_u = {
.ip4_u = {
- .daddr = cp->daddr,
+ .daddr = cp->daddr.ip,
.saddr = 0,
.tos = rtos, } },
};
if (ip_route_output_key(&init_net, &rt, &fl)) {
IP_VS_DBG_RL("ip_route_output error, dest: "
- "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+ "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
return NULL;
}
}
@@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
return rt;
}
+#ifdef CONFIG_IP_VS_IPV6
+static struct rt6_info *
+__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct ip_vs_dest *dest = cp->dest;
+
+ if (dest) {
+ spin_lock(&dest->dst_lock);
+ rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
+ if (!rt) {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = dest->addr.in6,
+ .saddr = {
+ .s6_addr32 =
+ { 0, 0, 0, 0 },
+ },
+ },
+ },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net,
+ NULL, &fl);
+ if (!rt) {
+ spin_unlock(&dest->dst_lock);
+ IP_VS_DBG_RL("ip6_route_output error, "
+ "dest: " NIP6_FMT "\n",
+ NIP6(dest->addr.in6));
+ return NULL;
+ }
+ __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+ IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
+ NIP6(dest->addr.in6),
+ atomic_read(&rt->u.dst.__refcnt));
+ }
+ spin_unlock(&dest->dst_lock);
+ } else {
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = cp->daddr.in6,
+ .saddr = {
+ .s6_addr32 = { 0, 0, 0, 0 },
+ },
+ },
+ },
+ };
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (!rt) {
+ IP_VS_DBG_RL("ip6_route_output error, dest: "
+ NIP6_FMT "\n", NIP6(cp->daddr.in6));
+ return NULL;
+ }
+ }
+
+ return rt;
+}
+#endif
+
/*
* Release dest->dst_cache before a dest is removed
@@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
dst_release(old_dst);
}
-#define IP_VS_XMIT(skb, rt) \
+#define IP_VS_XMIT(pf, skb, rt) \
do { \
(skb)->ipvs_property = 1; \
skb_forward_csum(skb); \
- NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
(rt)->u.dst.dev, dst_output); \
} while (0)
@@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(skb, rt);
+ IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
@@ -213,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int mtu;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = iph->daddr,
+ .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+ };
+
+ EnterFunction(10);
+
+ rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ if (!rt) {
+ IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
+ "dest: " NIP6_FMT "\n", NIP6(iph->daddr));
+ goto tx_error_icmp;
+ }
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(skb == NULL)) {
+ dst_release(&rt->u.dst);
+ return NF_STOLEN;
+ }
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+ tx_error_icmp:
+ dst_link_failure(skb);
+ tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
/*
* NAT transmitter (only for outside-to-inside nat forwarding)
@@ -264,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
- ip_hdr(skb)->daddr = cp->daddr;
+ ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
@@ -276,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(skb, rt);
+ IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
@@ -292,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
}
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+
+ EnterFunction(10);
+
+ /* check if it is a connection of no-client-port */
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ __be16 _pt, *p;
+ p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
+ sizeof(_pt), &_pt);
+ if (p == NULL)
+ goto tx_error;
+ ip_vs_conn_fill_cport(cp, *p);
+ IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
+ }
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "ip_vs_nat_xmit_v6(): frag needed for");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* mangle the packet */
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
+ goto tx_error;
+ ipv6_hdr(skb)->daddr = cp->daddr.in6;
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
+
+ /* FIXME: when application helper enlarges the packet and the length
+ is larger than the MTU of outgoing device, there will be still
+ MTU problem. */
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ LeaveFunction(10);
+ kfree_skb(skb);
+ return NF_STOLEN;
+tx_error_put:
+ dst_release(&rt->u.dst);
+ goto tx_error;
+}
+#endif
+
/*
* IP Tunneling transmitter
@@ -423,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ struct net_device *tdev; /* Device to other host */
+ struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ sk_buff_data_t old_transport_header = skb->transport_header;
+ struct ipv6hdr *iph; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ int mtu;
+
+ EnterFunction(10);
+
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+ "ETH_P_IPV6: %d, skb protocol: %d\n",
+ htons(ETH_P_IPV6), skb->protocol);
+ goto tx_error;
+ }
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ tdev = rt->u.dst.dev;
+
+ mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+ /* TODO IPv6: do we need this check in IPv6? */
+ if (mtu < 1280) {
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+ goto tx_error;
+ }
+ if (skb->dst)
+ skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+ if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
+
+ if (skb_headroom(skb) < max_headroom
+ || skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb =
+ skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb) {
+ dst_release(&rt->u.dst);
+ kfree_skb(skb);
+ IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+ return NF_STOLEN;
+ }
+ kfree_skb(skb);
+ skb = new_skb;
+ old_iph = ipv6_hdr(skb);
+ }
+
+ skb->transport_header = old_transport_header;
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /*
+ * Push down and install the IPIP header.
+ */
+ iph = ipv6_hdr(skb);
+ iph->version = 6;
+ iph->nexthdr = IPPROTO_IPV6;
+ iph->payload_len = old_iph->payload_len + sizeof(old_iph);
+ iph->priority = old_iph->priority;
+ memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
+ iph->daddr = rt->rt6i_dst.addr;
+ iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */
+ iph->hop_limit = old_iph->hop_limit;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ ip6_local_out(skb);
+
+ LeaveFunction(10);
+
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
+
/*
* Direct Routing transmitter
@@ -467,7 +782,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(skb, rt);
+ IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
@@ -480,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+
+ EnterFunction(10);
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ dst_release(&rt->u.dst);
+ IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+ goto tx_error;
+ }
+
+ /*
+ * Call ip_send_check because we are not sure it is called
+ * after ip_defrag. Is copy-on-write needed?
+ */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(skb == NULL)) {
+ dst_release(&rt->u.dst);
+ return NF_STOLEN;
+ }
+
+ /* drop old route */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ LeaveFunction(10);
+ return NF_STOLEN;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ kfree_skb(skb);
+ LeaveFunction(10);
+ return NF_STOLEN;
+}
+#endif
+
/*
* ICMP packet transmitter
@@ -540,7 +909,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(skb, rt);
+ IP_VS_XMIT(PF_INET, skb, rt);
rc = NF_STOLEN;
goto out;
@@ -557,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
goto tx_error;
}
+
+#ifdef CONFIG_IP_VS_IPV6
+int
+ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp, int offset)
+{
+ struct rt6_info *rt; /* Route to the other host */
+ int mtu;
+ int rc;
+
+ EnterFunction(10);
+
+ /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
+ forwarded directly here, because there is no need to
+ translate address/port back */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
+ if (cp->packet_xmit)
+ rc = cp->packet_xmit(skb, cp, pp);
+ else
+ rc = NF_ACCEPT;
+ /* do not touch skb anymore */
+ atomic_inc(&cp->in_pkts);
+ goto out;
+ }
+
+ /*
+ * mangle and send the packet here (only for VS/NAT)
+ */
+
+ rt = __ip_vs_get_out_rt_v6(cp);
+ if (!rt)
+ goto tx_error_icmp;
+
+ /* MTU checking */
+ mtu = dst_mtu(&rt->u.dst);
+ if (skb->len > mtu) {
+ dst_release(&rt->u.dst);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+ goto tx_error;
+ }
+
+ /* copy-on-write the packet before mangling it */
+ if (!skb_make_writable(skb, offset))
+ goto tx_error_put;
+
+ if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+ goto tx_error_put;
+
+ /* drop the old route when skb is not shared */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ ip_vs_nat_icmp_v6(skb, pp, cp, 0);
+
+ /* Another hack: avoid icmp_send in ip_fragment */
+ skb->local_df = 1;
+
+ IP_VS_XMIT(PF_INET6, skb, rt);
+
+ rc = NF_STOLEN;
+ goto out;
+
+tx_error_icmp:
+ dst_link_failure(skb);
+tx_error:
+ dev_kfree_skb(skb);
+ rc = NF_STOLEN;
+out:
+ LeaveFunction(10);
+ return rc;
+tx_error_put:
+ dst_release(&rt->u.dst);
+ goto tx_error;
+}
+#endif