diff options
Diffstat (limited to 'net')
31 files changed, 630 insertions, 830 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 05a4888dede..b3f627ac4ed 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb) nlmsg_len(nlh) < sizeof(*frn)) return; - skb = skb_clone(skb, GFP_KERNEL); + skb = netlink_skb_clone(skb, GFP_KERNEL); if (skb == NULL) return; nlh = nlmsg_hdr(skb); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd..d5dbca5ecf6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh) next = rcu_dereference_protected(fnhe->fnhe_next, 1); - rt_fibinfo_free(&fnhe->fnhe_rth); + rt_fibinfo_free(&fnhe->fnhe_rth_input); + rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3fa42eac46..a9a54a23683 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt) static DEFINE_SPINLOCK(fnhe_lock); +static void fnhe_flush_routes(struct fib_nh_exception *fnhe) +{ + struct rtable *rt; + + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); + rt_free(rt); + } + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); + rt_free(rt); + } +} + static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; - struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } - orig = rcu_dereference(oldest->fnhe_rth); - if (orig) { - RCU_INIT_POINTER(oldest->fnhe_rth, NULL); - rt_free(orig); - } + fnhe_flush_routes(oldest); return oldest; } @@ -644,7 +655,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_expires = max(1UL, expires); } /* Update all cached dsts too */ - rt = rcu_dereference(fnhe->fnhe_rth); + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) + fill_route_from_fnhe(rt, fnhe); + rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) fill_route_from_fnhe(rt, fnhe); } else { @@ -668,6 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ + rt = rcu_dereference(nh->nh_rth_input); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + for_each_possible_cpu(i) { struct rtable __rcu **prt; prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); @@ -1242,25 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + struct rtable __rcu **porig; + struct rtable *orig; int genid = fnhe_genid(dev_net(rt->dst.dev)); - struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); + + if (rt_is_input_route(rt)) + porig = &fnhe->fnhe_rth_input; + else + porig = &fnhe->fnhe_rth_output; + orig = rcu_dereference(*porig); if (fnhe->fnhe_genid != genid) { fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; + fnhe_flush_routes(fnhe); + orig = NULL; } fill_route_from_fnhe(rt, fnhe); if (!rt->rt_gateway) rt->rt_gateway = daddr; - rcu_assign_pointer(fnhe->fnhe_rth, rt); - if (orig) - rt_free(orig); + if (!(rt->dst.flags & DST_NOCACHE)) { + rcu_assign_pointer(*porig, rt); + if (orig) + rt_free(orig); + ret = true; + } fnhe->fnhe_stamp = jiffies; - ret = true; } spin_unlock_bh(&fnhe_lock); @@ -1492,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1538,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (fnhe != NULL) + rth = rcu_dereference(fnhe->fnhe_rth_input); + else + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1567,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1882,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(nh, fl4->daddr); if (fnhe) - prth = &fnhe->fnhe_rth; + prth = &fnhe->fnhe_rth_output; else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4e4cc1fc26d..12dd2fec045 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3277,6 +3277,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; + bool send_rs, send_mld; addrconf_del_dad_timer(ifp); @@ -3290,20 +3291,32 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) router advertisements, start sending router solicitations. */ - if (ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + read_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + send_rs = send_mld && + ipv6_accept_ra(ifp->idev) && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0; + spin_unlock(&ifp->lock); + read_unlock_bh(&ifp->idev->lock); + + /* While dad is in progress mld report's source address is in6_addrany. + * Resend with proper ll now. + */ + if (send_mld) + ipv6_mc_dad_complete(ifp->idev); + + if (send_rs) { /* * If a host as already performed a random delay * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) - ndisc_send_rs(dev, &lladdr, - &in6addr_linklocal_allrouters); - else + if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) return; + ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters); write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); @@ -4576,6 +4589,19 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } +static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) +{ + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| + IFA_F_DADFAILED)) == IFA_F_PERMANENT) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) + ifp->idev->valid_ll_addr_cnt += count; + WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); +} + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); @@ -4584,6 +4610,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: + update_valid_ll_addr_cnt(ifp, 1); + /* * If the address was optimistic * we inserted the route at the start of @@ -4599,6 +4627,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0); break; case RTM_DELADDR: + update_valid_ll_addr_cnt(ifp, -1); + if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 72c8bfe06bb..502c877cbf1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,6 +999,14 @@ static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) in6_dev_hold(idev); } +static void mld_dad_start_timer(struct inet6_dev *idev, int delay) +{ + int tv = net_random() % delay; + + if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + in6_dev_hold(idev); +} + /* * IGMP handling (alias multicast ICMPv6 messages) */ @@ -1815,6 +1823,46 @@ err_out: goto out; } +static void mld_resend_report(struct inet6_dev *idev) +{ + if (MLD_V1_SEEN(idev)) { + struct ifmcaddr6 *mcaddr; + read_lock_bh(&idev->lock); + for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { + if (!(mcaddr->mca_flags & MAF_NOREPORT)) + igmp6_send(&mcaddr->mca_addr, idev->dev, + ICMPV6_MGM_REPORT); + } + read_unlock_bh(&idev->lock); + } else { + mld_send_report(idev, NULL); + } +} + +void ipv6_mc_dad_complete(struct inet6_dev *idev) +{ + idev->mc_dad_count = idev->mc_qrv; + if (idev->mc_dad_count) { + mld_resend_report(idev); + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } +} + +static void mld_dad_timer_expire(unsigned long data) +{ + struct inet6_dev *idev = (struct inet6_dev *)data; + + mld_resend_report(idev); + if (idev->mc_dad_count) { + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } + __in6_dev_put(idev); +} + static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2232,6 +2280,8 @@ void ipv6_mc_down(struct inet6_dev *idev) idev->mc_gq_running = 0; if (del_timer(&idev->mc_gq_timer)) __in6_dev_put(idev); + if (del_timer(&idev->mc_dad_timer)) + __in6_dev_put(idev); for (i = idev->mc_list; i; i=i->next) igmp6_group_dropped(i); @@ -2268,6 +2318,8 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) idev->mc_ifc_count = 0; setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, (unsigned long)idev); + setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c8c52a98590..4c8e5c0aa1a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1231,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net) default: continue; } + } else if (cp->protocol == IPPROTO_SCTP) { + switch (cp->state) { + case IP_VS_SCTP_S_INIT1: + case IP_VS_SCTP_S_INIT: + break; + case IP_VS_SCTP_S_ESTABLISHED: + if (todrop_entry(cp)) + break; + continue; + default: + continue; + } } else { if (!todrop_entry(cp)) continue; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 05565d2b3a6..e9b0330f220 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -305,7 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -452,7 +452,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 47e510819f5..c8148e48738 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1715,6 +1715,12 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_sync_ports, }, { + .procname = "sync_persist_mode", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "sync_qlen_max", .maxlen = sizeof(unsigned long), .mode = 0644, @@ -1739,6 +1745,18 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, { + .procname = "sloppy_tcp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sloppy_sctp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "expire_quiescent_template", .maxlen = sizeof(int), .mode = 0644, @@ -3717,12 +3735,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_ver; ipvs->sysctl_sync_ports = 1; tbl[idx++].data = &ipvs->sysctl_sync_ports; + tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; ipvs->sysctl_sync_sock_size = 0; tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; + tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ccab120df45..c3b84546ea9 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Destination hashing scheduling */ static struct ip_vs_dest * -ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_dh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); s = (struct ip_vs_dh_state *) svc->sched_data; - dest = ip_vs_dh_get(svc->af, s, &iph.daddr); + dest = ip_vs_dh_get(svc->af, s, &iph->daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 44595b8ae37..1383b0eadc0 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -487,19 +487,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -529,12 +527,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblc_new(tbl, &iph.daddr, dest); + ip_vs_lblc_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 876937db0bf..3cd85b2fc67 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -655,19 +655,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblcr_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); if (en) { en->lastuse = jiffies; @@ -718,12 +716,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblcr_new(tbl, &iph.daddr, dest); + ip_vs_lblcr_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 5128e338a74..2bdcb1cf212 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -26,7 +26,8 @@ * Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 646cfd4baa7..d8d9860934f 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -55,7 +55,8 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 86464881cd2..3c0da872803 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct net *net; struct ip_vs_service *svc; + struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT) && + if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, sh->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -183,710 +185,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } -struct ipvs_sctp_nextstate { - int next_state; -}; enum ipvs_sctp_event_t { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_SER, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_SER, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_INIT_ACK_SER, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_SER, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_SER, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE__ABORT_SER, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_SER, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_SER, - IP_VS_SCTP_EVE_SHUT_COM_CLI, - IP_VS_SCTP_EVE_SHUT_COM_SER, - IP_VS_SCTP_EVE_LAST + IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ + IP_VS_SCTP_INIT, + IP_VS_SCTP_INIT_ACK, + IP_VS_SCTP_COOKIE_ECHO, + IP_VS_SCTP_COOKIE_ACK, + IP_VS_SCTP_SHUTDOWN, + IP_VS_SCTP_SHUTDOWN_ACK, + IP_VS_SCTP_SHUTDOWN_COMPLETE, + IP_VS_SCTP_ERROR, + IP_VS_SCTP_ABORT, + IP_VS_SCTP_EVENT_LAST }; -static enum ipvs_sctp_event_t sctp_events[256] = { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_SHUT_COM_CLI, +/* RFC 2960, 3.2 Chunk Field Descriptions */ +static __u8 sctp_events[] = { + [SCTP_CID_DATA] = IP_VS_SCTP_DATA, + [SCTP_CID_INIT] = IP_VS_SCTP_INIT, + [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, + [SCTP_CID_SACK] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, + [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, + [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, + [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, + [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, + [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, + [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, + [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, + [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, + [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, }; -static struct ipvs_sctp_nextstate - sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { - /* - * STATE : IP_VS_SCTP_S_NONE - */ - /*next state *//*event */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, - }, - /* - * STATE : IP_VS_SCTP_S_INIT_CLI - * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_SER - * Server sent INIT and waiting for INIT ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_CLI - * Client sent INIT ACK and waiting for ECHO from the server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been resent by the client, let us stay is in - * the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * ECHO by client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO by server, this is what we are expecting, move to ECHO_SER - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * Unexpected COOKIE ACK from server, staty in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_SER - * Server sent INIT ACK and waiting for ECHO from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * Unexpected INIT_ACK by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK resent by the server, let us move to same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client send the ECHO, this is what we are expecting, - * move to ECHO_CLI - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, let us stay in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, hmm... this should not happen, lets close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_CLI - * Cient sent ECHO and waiting COOKEI ACK from the Server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client resent the ECHO, let us stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this shoud not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this is what we are awaiting,lets move to - * ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_SER - * Server sent ECHO and waiting COOKEI ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK has been by the server, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent the ECHO, not sure what to do, let's close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO resent by the server, stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this is what we are expecting, let's move - * to ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this should not happen, lets close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ESTABLISHED - * Association established - */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_CLI - * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this is what we are expecting, let's move - * to SHUDOWN_ACK_SER - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_SER - * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN resent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this is what we are expecting, let's - * move to SHUT_ACK_CLI - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_CLI - * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client resent SHUDTDOWN_ACK, let's stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this is what we are expecting. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_SER - * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ +/* SCTP States: + * See RFC 2960, 4. SCTP Association State Diagram + * + * New states (not in diagram): + * - INIT1 state: use shorter timeout for dropped INIT packets + * - REJECTED state: use shorter timeout if INIT is rejected with ABORT + * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging + * + * The states are as seen in real server. In the diagram, INIT1, INIT, + * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. + * + * States as per packets from client (C) and server (S): + * + * Setup of client connection: + * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK + * + * Setup of server connection: + * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK + * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK + */ - {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.âUpon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchangedâ). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * âIf an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunkâ. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen let's close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server resent SHUTDOWN ACK, stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this what we are expecting, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this should not happen. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_CLOSED - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - } +#define sNO IP_VS_SCTP_S_NONE +#define sI1 IP_VS_SCTP_S_INIT1 +#define sIN IP_VS_SCTP_S_INIT +#define sCS IP_VS_SCTP_S_COOKIE_SENT +#define sCR IP_VS_SCTP_S_COOKIE_REPLIED +#define sCW IP_VS_SCTP_S_COOKIE_WAIT +#define sCO IP_VS_SCTP_S_COOKIE +#define sCE IP_VS_SCTP_S_COOKIE_ECHOED +#define sES IP_VS_SCTP_S_ESTABLISHED +#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT +#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED +#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT +#define sRJ IP_VS_SCTP_S_REJECTED +#define sCL IP_VS_SCTP_S_CLOSED + +static const __u8 sctp_states + [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { + { /* INPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* OUTPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, +/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, +/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* INPUT-ONLY */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, }; -/* - * Timeout table[state] - */ +#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) + +/* Timeout table[state] */ static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = 2 * HZ, - [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_CLOSED] = 10 * HZ, - [IP_VS_SCTP_S_LAST] = 2 * HZ, + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_LAST] = 2 * HZ, }; static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = "NONE", - [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", - [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", - [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", - [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", - [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", - [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", - [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", - [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", - [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", - [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", - [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", - [IP_VS_SCTP_S_CLOSED] = "CLOSED", - [IP_VS_SCTP_S_LAST] = "BUG!" + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT1] = "INIT1", + [IP_VS_SCTP_S_INIT] = "INIT", + [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", + [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", + [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", + [IP_VS_SCTP_S_COOKIE] = "COOKIE", + [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", + [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", + [IP_VS_SCTP_S_REJECTED] = "REJECTED", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!", }; @@ -943,17 +394,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, } } - event = sctp_events[chunk_type]; + event = (chunk_type < sizeof(sctp_events)) ? + sctp_events[chunk_type] : IP_VS_SCTP_DATA; - /* - * If the direction is IP_VS_DIR_OUTPUT, this event is from server - */ - if (direction == IP_VS_DIR_OUTPUT) - event++; - /* - * get next state + /* Update direction to INPUT_ONLY if necessary + * or delete NO_OUTPUT flag if output packet detected */ - next_state = sctp_states_table[cp->state][event].next_state; + if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { + if (direction == IP_VS_DIR_OUTPUT) + cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; + else + direction = IP_VS_DIR_INPUT_ONLY; + } + + next_state = sctp_states[direction][event][cp->state]; if (next_state != cp->state) { struct ip_vs_dest *dest = cp->dest; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 50a15944c6c..e3a697234a9 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct netns_ipvs *ipvs; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { @@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); + ipvs = net_ipvs(net); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if (th->syn && + if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, th->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, /* OUTPUT */ @@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; @@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, -/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, +/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /* OUTPUT */ @@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c35986c793d..176b87c35e3 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) * Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f3205925359..a5284cc3d88 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -59,7 +59,8 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index a65edfe4b16..f16c027df15 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -48,6 +48,10 @@ #include <net/ip_vs.h> +#include <net/tcp.h> +#include <linux/udp.h> +#include <linux/sctp.h> + /* * IPVS SH bucket @@ -71,10 +75,19 @@ struct ip_vs_sh_state { struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; +/* Helper function to determine if server is unavailable */ +static inline bool is_unavailable(struct ip_vs_dest *dest) +{ + return atomic_read(&dest->weight) <= 0 || + dest->flags & IP_VS_DEST_F_OVERLOAD; +} + /* * Returns hash value for IPVS SH entry */ -static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int +ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, + __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; @@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; + return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & + IP_VS_SH_TAB_MASK; } @@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) +ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) { - return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); + unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); + + return (!dest || is_unavailable(dest)) ? NULL : dest; } +/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +static inline struct ip_vs_dest * +ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) +{ + unsigned int offset; + unsigned int hash; + struct ip_vs_dest *dest; + + for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { + hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + dest = rcu_dereference(s->buckets[hash].dest); + if (!dest) + break; + if (is_unavailable(dest)) + IP_VS_DBG_BUF(6, "SH: selected unavailable server " + "%s:%d (offset %d)", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), offset); + else + return dest; + } + + return NULL; +} + /* * Assign all the hash buckets of the specified table with the service. */ @@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, } -/* - * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, - * consider that the server is overloaded here. - */ -static inline int is_overloaded(struct ip_vs_dest *dest) +/* Helper function to get port number */ +static inline __be16 +ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - return dest->flags & IP_VS_DEST_F_OVERLOAD; + __be16 port; + struct tcphdr _tcph, *th; + struct udphdr _udph, *uh; + sctp_sctphdr_t _sctph, *sh; + + switch (iph->protocol) { + case IPPROTO_TCP: + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + port = th->source; + break; + case IPPROTO_UDP: + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + port = uh->source; + break; + case IPPROTO_SCTP: + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + port = sh->source; + break; + default: + port = 0; + } + + return port; } @@ -227,28 +291,32 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Source Hashing scheduling */ static struct ip_vs_dest * -ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); + __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); + if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) + port = ip_vs_sh_get_port(skb, iph); + s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph.saddr); - if (!dest - || !(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest)) { + + if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + else + dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + + if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6046d9af8d..f4484719f3e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) return sb; } +/* Check if connection is controlled by persistence */ +static inline bool in_persistence(struct ip_vs_conn *cp) +{ + for (cp = cp->control; cp; cp = cp->control) { + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + return true; + } + return false; +} + /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry @@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; + else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) + return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | @@ -461,9 +473,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | - (1 << IP_VS_SCTP_S_CLOSED) | - (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | - (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) + (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | + (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | + (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | + (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index c60a81c4ce9..6dc1fa12884 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -31,7 +31,8 @@ * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0e68555bceb..0546cd572d6 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6d0f8a17c5b..f83a52298ef 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -828,7 +828,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) struct nf_conntrack_l3proto *l3proto; int ret = 0; - nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + if (ret < 0) + return ret; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -895,7 +897,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + if (err < 0) + return err; if (!tb[CTA_TUPLE_IP]) return -EINVAL; @@ -946,9 +950,12 @@ static inline int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, struct nlattr **helpinfo) { + int err; struct nlattr *tb[CTA_HELP_MAX+1]; - nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + if (err < 0) + return err; if (!tb[CTA_HELP_NAME]) return -EINVAL; @@ -1431,7 +1438,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] struct nf_conntrack_l4proto *l4proto; int err = 0; - nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + if (err < 0) + return err; rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); @@ -1452,9 +1461,12 @@ static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { static inline int change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { + int err; struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; - nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + if (err < 0) + return err; if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) return -EINVAL; @@ -2115,7 +2127,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + if (ret < 0) + return ret; spin_lock_bh(&nf_conntrack_lock); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); @@ -2710,7 +2724,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + if (err < 0) + return err; if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) return -EINVAL; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4d4d8f1d01f..7dcc376eea5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection + * pickup with loose=1. Avoid large ESTABLISHED timeout. + */ + if (new_state == TCP_CONNTRACK_ESTABLISHED && + timeout > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a191b6db657..9e287cb56a0 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -67,9 +67,12 @@ static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + if (err < 0) + return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; @@ -121,9 +124,12 @@ static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + if (err < 0) + return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || @@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (ret < 0) + return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 65074dfb938..50580494148 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; - nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + if (ret < 0) + return ret; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, &timeout->data); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 299a48ae5dc..971ea145ab3 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -280,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } -static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +static int +nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, + bool csum_verify) { __u32 flags = 0; if (packet->ip_summed == CHECKSUM_PARTIAL) flags = NFQA_SKB_CSUMNOTREADY; + else if (csum_verify) + flags = NFQA_SKB_CSUM_NOTVERIFIED; + if (skb_is_gso(packet)) flags |= NFQA_SKB_GSO; @@ -310,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); + bool csum_verify; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -327,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + if (entry->hook <= NF_INET_FORWARD || + (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + csum_verify = !skb_csum_unnecessary(entskb); + else + csum_verify = false; + outdev = entry->outdev; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { @@ -476,7 +488,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; - if (nfqnl_put_packet_info(skb, entskb)) + if (nfqnl_put_packet_info(skb, entskb, csum_verify)) goto nla_put_failure; if (data_len) { diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02704245710..f8b71911037 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -163,8 +163,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY, + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, @@ -197,7 +200,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -259,7 +262,7 @@ extract_icmp6_fields(const struct sk_buff *skb, } static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -302,8 +305,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); /* Ignore non-transparent sockets, @@ -331,6 +337,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_v1_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V1) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + return -EINVAL; + } + return 0; +} + +static int socket_mt_v2_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V2) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -345,7 +373,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), @@ -356,7 +385,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v1_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v2_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6967fbcca6c..0c61b59175d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -849,7 +849,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) } #endif if (is_vmalloc_addr(skb->head)) { - vfree(skb->head); + if (!skb->cloned || + !atomic_dec_return(&(skb_shinfo(skb)->dataref))) + vfree(skb->head); + skb->head = NULL; } if (skb->sk != NULL) @@ -1532,33 +1535,31 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } -static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +static struct sk_buff *netlink_alloc_large_skb(unsigned int size, + int broadcast) { struct sk_buff *skb; void *data; - if (size <= NLMSG_GOODSIZE) + if (size <= NLMSG_GOODSIZE || broadcast) return alloc_skb(size, GFP_KERNEL); - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) - return NULL; + size = SKB_DATA_ALIGN(size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); data = vmalloc(size); if (data == NULL) - goto err; + return NULL; - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - skb->destructor = netlink_skb_destructor; + skb = build_skb(data, size); + if (skb == NULL) + vfree(data); + else { + skb->head_frag = 0; + skb->destructor = netlink_skb_destructor; + } return skb; -err: - kfree_skb(skb); - return NULL; } /* @@ -2244,7 +2245,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = netlink_alloc_large_skb(len); + skb = netlink_alloc_large_skb(len, dst_group); if (skb == NULL) goto out; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 9fbc04a31ed..27ee56b688a 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,8 +19,6 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. - Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. - See http://openvswitch.org for more information and userspace utilities. @@ -28,3 +26,17 @@ config OPENVSWITCH called openvswitch. If unsure, say N. + +config OPENVSWITCH_GRE + bool "Open vSwitch GRE tunneling support" + depends on INET + depends on OPENVSWITCH + depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m) + default y + ---help--- + If you say Y here, then the Open vSwitch will be able create GRE + vport. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 943e5c43135..493e9775dcd 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -16,7 +16,7 @@ * 02110-1301, USA */ -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/if.h> @@ -271,4 +271,5 @@ const struct vport_ops ovs_gre_vport_ops = { .get_name = gre_get_name, .send = gre_tnl_send, }; -#endif + +#endif /* OPENVSWITCH_GRE */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ba81294219a..d4c7fa04ce0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -39,7 +39,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE &ovs_gre_vport_ops, #endif }; |