summaryrefslogtreecommitdiffstats
path: root/include/net/ip_vs.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-21 10:03:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-21 10:03:46 -0700
commitcb62ab71fe2b16e8203a0f0a2ef4eda23d761338 (patch)
tree536ba39658e47d511a489c52f7aac60cd78967e5 /include/net/ip_vs.h
parent31ed8e6f93a27304c9e157dab0267772cd94eaad (diff)
parent74863948f925d9f3bb4e3d3a783e49e9c662d839 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: 1) Get rid of the error prone NLA_PUT*() macros that used an embedded goto. 2) Kill off the token-ring and MCA networking drivers, from Paul Gortmaker. 3) Reduce high-order allocations made by datagram AF_UNIX sockets, from Eric Dumazet. 4) Add PTP hardware clock support to IGB and IXGBE, from Richard Cochran and Jacob Keller. 5) Allow users to query timestamping capabilities of a card via ethtool, from Richard Cochran. 6) Add loadbalance mode to the teaming driver, from Jiri Pirko. Part of this is that we can now have BPF filters not attached to sockets, and the loadbalancing function is calculated using one. 7) Francois Romieu went through the network drivers removing gratuitous uses of netdev->base_addr, perhaps some day we can remove it completely but it's used for ISA probing still. 8) Add a BPF JIT for sparc. I know, who cares, right? :-) 9) Move networking sysctl registry away from using the compatability mode interfaces in the sysctl code. From Eric W Biederman. 10) Pavel Emelyanov added a way to save and restore TCP socket state via TCP_REPAIR, TCP_REPAIR_QUEUE, and TCP_QUEUE_SEQ socket options as well as a way to forcefully bind a socket to a port via the sk->sk_reuse value SK_FORCE_REUSE. There is also a TCP_REPAIR_OPTIONS which allows to reinstante the TCP options enabled on the connection. 11) Several enhancements from Eric Dumazet that, in particular, can enhance splice performance on TCP sockets significantly. a) Reset the offset of the per-socket sendmsg page when we know we're the only use of the page in linear_to_page(). b) Add facilities such that skb->data can be backed a page rather than SLAB kmalloc'd memory. In particular devices which were receiving into linear RX buffers can now end up providing paged data. The big result is that code like splice and GRO do not have to copy any more. 12) Allow a pure sender to more gracefully handle ACK backlogs in TCP. What can happen at high rates is that the sender hasn't grown his receive buffer limits at all (he's not receiving data so really doesn't need to), but the non-data ACKs consume receive buffer space. sk_add_backlog() is too aggressive in dropping frames in this case, so relax it's requirements by using the receive buffer plus the send buffer limit as the backlog limit instead of just the former. Also from Eric Dumazet. 13) Add ipv6 support to L2TP, from Benjamin LaHaise, James Chapman, and Chris Elston. 14) Implement TCP early retransmit (RFC 5827), from Yuchung Cheng. Basically, we can start fast retransmit before hiting the dupack threshold under certain conditions. 15) New CODEL active queue management packet scheduler, from Eric Dumazet based upon initial work by Dave Taht. Basically, the big feature is that packets are dropped (or ECN bits are set) based upon how long packets live in the queue, rather than the queue length (which is what RED uses). * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1341 commits) drivers/net/stmmac: seq_file fix memory leak ipv6/exthdrs: strict Pad1 and PadN check USB: qmi_wwan: Add ZTE (Vodafone) K3520-Z USB: qmi_wwan: Add ZTE (Vodafone) K3765-Z USB: qmi_wwan: Make forced int 4 whitelist generic net/ipv4: replace simple_strtoul with kstrtoul net/ipv4/ipconfig: neaten __setup placement net: qmi_wwan: Add Vodafone/Huawei K5005 support net: cdc_ether: Add ZTE WWAN matches before generic Ethernet ipv6: use skb coalescing in reassembly ipv4: use skb coalescing in defragmentation net: introduce skb_try_coalesce() net:ipv6:fixed space issues relating to operators. net:ipv6:fixed a trailing white space issue. ipv6: disable GSO on sockets hitting dst_allfrag tg3: use netdev_alloc_frag() API net: napi_frags_skb() is static ppp: avoid false drop_monitor false positives ipv6: bool/const conversions phase2 ipx: Remove spurious NULL checking in ipx_ioctl(). ...
Diffstat (limited to 'include/net/ip_vs.h')
-rw-r--r--include/net/ip_vs.h105
1 files changed, 87 insertions, 18 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 72522f08737..d6146b4811c 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -10,7 +10,6 @@
#include <asm/types.h> /* for __uXX types */
-#include <linux/sysctl.h> /* for ctl_path */
#include <linux/list.h> /* for struct list_head */
#include <linux/spinlock.h> /* for struct rwlock_t */
#include <linux/atomic.h> /* for struct atomic_t */
@@ -505,6 +504,7 @@ struct ip_vs_conn {
* state transition triggerd
* synchronization
*/
+ unsigned long sync_endtime; /* jiffies + sent_retries */
/* Control members */
struct ip_vs_conn *control; /* Master control connection */
@@ -580,8 +580,8 @@ struct ip_vs_service_user_kern {
/* virtual service options */
char *sched_name;
char *pe_name;
- unsigned flags; /* virtual service flags */
- unsigned timeout; /* persistent timeout in sec */
+ unsigned int flags; /* virtual service flags */
+ unsigned int timeout; /* persistent timeout in sec */
u32 netmask; /* persistent netmask */
};
@@ -592,7 +592,7 @@ struct ip_vs_dest_user_kern {
u16 port;
/* real server options */
- unsigned conn_flags; /* connection flags */
+ unsigned int conn_flags; /* connection flags */
int weight; /* destination weight */
/* thresholds for active connections */
@@ -616,8 +616,8 @@ struct ip_vs_service {
union nf_inet_addr addr; /* IP address for virtual service */
__be16 port; /* port number for the service */
__u32 fwmark; /* firewall mark of the service */
- unsigned flags; /* service status flags */
- unsigned timeout; /* persistent timeout in ticks */
+ unsigned int flags; /* service status flags */
+ unsigned int timeout; /* persistent timeout in ticks */
__be32 netmask; /* grouping granularity */
struct net *net;
@@ -647,7 +647,7 @@ struct ip_vs_dest {
u16 af; /* address family */
__be16 port; /* port number of the server */
union nf_inet_addr addr; /* IP address of the server */
- volatile unsigned flags; /* dest status flags */
+ volatile unsigned int flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
@@ -784,6 +784,16 @@ struct ip_vs_app {
void (*timeout_change)(struct ip_vs_app *app, int flags);
};
+struct ipvs_master_sync_state {
+ struct list_head sync_queue;
+ struct ip_vs_sync_buff *sync_buff;
+ int sync_queue_len;
+ unsigned int sync_queue_delay;
+ struct task_struct *master_thread;
+ struct delayed_work master_wakeup_work;
+ struct netns_ipvs *ipvs;
+};
+
/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
@@ -870,10 +880,15 @@ struct netns_ipvs {
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
+ int sysctl_sync_ports;
+ int sysctl_sync_qlen_max;
+ int sysctl_sync_sock_size;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2];
+ unsigned int sysctl_sync_refresh_period;
+ int sysctl_sync_retries;
int sysctl_nat_icmp_send;
/* ip_vs_lblc */
@@ -889,13 +904,11 @@ struct netns_ipvs {
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
- struct list_head sync_queue;
spinlock_t sync_lock;
- struct ip_vs_sync_buff *sync_buff;
+ struct ipvs_master_sync_state *ms;
spinlock_t sync_buff_lock;
- struct sockaddr_in sync_mcast_addr;
- struct task_struct *master_thread;
- struct task_struct *backup_thread;
+ struct task_struct **backup_threads;
+ int threads_mask;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state;
@@ -912,6 +925,14 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_VER 1
+#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ)
+#define DEFAULT_SYNC_RETRIES 0
+#define IPVS_SYNC_WAKEUP_RATE 8
+#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4)
+#define IPVS_SYNC_SEND_DELAY (HZ / 50)
+#define IPVS_SYNC_CHECK_PERIOD HZ
+#define IPVS_SYNC_FLUSH_TIME (HZ * 2)
+#define IPVS_SYNC_PORTS_MAX (1 << 6)
#ifdef CONFIG_SYSCTL
@@ -922,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
{
- return ipvs->sysctl_sync_threshold[1];
+ return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
+}
+
+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+ return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_retries;
}
static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
@@ -930,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_ver;
}
+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+ return ACCESS_ONCE(ipvs->sysctl_sync_ports);
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_qlen_max;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_sock_size;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -942,18 +988,43 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return DEFAULT_SYNC_PERIOD;
}
+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_REFRESH_PERIOD;
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_RETRIES & 3;
+}
+
static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_VER;
}
+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+ return 1;
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+ return IPVS_SYNC_QLEN_MAX;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+ return 0;
+}
+
#endif
/*
* IPVS core functions
* (from ip_vs_core.c)
*/
-extern const char *ip_vs_proto_name(unsigned proto);
+extern const char *ip_vs_proto_name(unsigned int proto);
extern void ip_vs_init_hash_table(struct list_head *table, int rows);
#define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
@@ -1014,7 +1085,7 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr,
- __be16 dport, unsigned flags,
+ __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
@@ -1184,10 +1255,8 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
* IPVS control data and functions (from ip_vs_ctl.c)
*/
extern struct ip_vs_stats ip_vs_stats;
-extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
-extern void ip_vs_sync_switch_mode(struct net *net, int mode);
extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
@@ -1221,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
__u8 syncid);
extern int stop_sync_thread(struct net *net, int state);
-extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
/*