From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Oct 2009 06:30:45 +0000 Subject: inet: rename some inet_sock fields In order to have better cache layouts of struct sock (separate zones for rx/tx paths), we need this preliminary patch. Goal is to transfert fields used at lookup time in the first read-mostly cache line (inside struct sock_common) and move sk_refcnt to a separate cache line (only written by rx path) This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr, sport and id fields. This allows a future patch to define these fields as macros, like sk_refcnt, without name clashes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 612dc878e05..d9f4cc2c786 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -296,19 +296,19 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; - addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr; + addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_rcv_saddr = addr->v4.sin_addr.s_addr; } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_daddr = addr->v4.sin_addr.s_addr; } /* Initialize a sctp_addr from an address parameter. */ @@ -598,7 +598,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet = inet_sk(newsk); - newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; sk_refcnt_debug_inc(newsk); -- cgit v1.2.3-70-g09d2 From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 05:43:23 -0800 Subject: net: Introduce for_each_netdev_rcu() iterator Adds RCU management to the list of netdevices. Convert some for_each_netdev() users to RCU version, if it can avoid read_lock-ing dev_base_lock Ie: read_lock(&dev_base_loack); for_each_netdev(net, dev) some_action(); read_unlock(&dev_base_lock); becomes : rcu_read_lock(); for_each_netdev_rcu(net, dev) some_action(); rcu_read_unlock(); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 30 ++++++++++++++++-------------- net/decnet/af_decnet.c | 6 +++--- net/decnet/dn_fib.c | 6 +++--- net/decnet/dn_route.c | 6 +++--- net/ipv4/devinet.c | 30 +++++++++++------------------- net/ipv6/addrconf.c | 20 +++++++------------- net/ipv6/anycast.c | 6 +++--- net/netrom/nr_route.c | 15 ++++++++------- net/rose/rose_route.c | 18 +++++++++--------- net/sctp/protocol.c | 6 +++--- 11 files changed, 68 insertions(+), 77 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcf1083857f..5077de02831 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1081,6 +1081,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_rcu(net, d) \ + list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ diff --git a/net/core/dev.c b/net/core/dev.c index 76a1502efe6..bf629ac08b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -175,7 +175,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. * - * Pure readers hold dev_base_lock for reading. + * Pure readers hold dev_base_lock for reading, or rcu_read_lock() * * Writers must hold the rtnl semaphore while they loop through the * dev_base_head list, and hold dev_base_lock for writing when they do the @@ -212,7 +212,7 @@ static int list_netdevice(struct net_device *dev) ASSERT_RTNL(); write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); + list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); @@ -229,7 +229,7 @@ static void unlist_netdevice(struct net_device *dev) /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); + list_del_rcu(&dev->dev_list); hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); @@ -799,15 +799,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, struct net_device *dev, *ret; ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(dev_get_by_flags); @@ -3077,18 +3077,18 @@ static int dev_ifconf(struct net *net, char __user *arg) * in detail. */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; - read_lock(&dev_base_lock); + rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (off++ == *pos) return dev; @@ -3097,16 +3097,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net *net = seq_file_net(seq); + struct net_device *dev = (v == SEQ_START_TOKEN) ? + first_net_device(seq_file_net(seq)) : + next_net_device((struct net_device *)v); + ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); + return rcu_dereference(dev); } void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 664965c87e1..2e355841ca9 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -749,9 +749,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(saddr->sdn_flags & SDF_WILD)) { if (le16_to_cpu(saddr->sdn_nodeaddrl)) { - read_lock(&dev_base_lock); + rcu_read_lock(); ldev = NULL; - for_each_netdev(&init_net, dev) { + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -759,7 +759,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (ldev == NULL) return -EADDRNOTAVAIL; } diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 27ea2e9b080..fd641f65e09 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -607,8 +607,8 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Scan device list */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; @@ -619,7 +619,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) } } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (found_it == 0) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 57662cabaf9..860286a3921 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -908,8 +908,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_put(dev_out); goto out; } - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -922,7 +922,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_out = dev; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (dev_out == NULL) goto out; dev_hold(dev_out); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ccccaae50b2..8aa7a134c1f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -876,19 +876,16 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (!addr) addr = ifa->ifa_local; } endfor_ifa(in_dev); -no_in_dev: - rcu_read_unlock(); +no_in_dev: if (addr) - goto out; + goto out_unlock; /* Not loopback addresses on loopback should be preferred in this case. It is importnat that lo is the first interface in dev_base list. */ - read_lock(&dev_base_lock); - rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -896,12 +893,11 @@ no_in_dev: if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { addr = ifa->ifa_local; - goto out_unlock_both; + goto out_unlock; } } endfor_ifa(in_dev); } -out_unlock_both: - read_unlock(&dev_base_lock); +out_unlock: rcu_read_unlock(); out: return addr; @@ -962,9 +958,8 @@ __be32 inet_confirm_addr(struct in_device *in_dev, return confirm_addr_indev(in_dev, dst, local, scope); net = dev_net(in_dev->dev); - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -972,7 +967,6 @@ __be32 inet_confirm_addr(struct in_device *in_dev, } } rcu_read_unlock(); - read_unlock(&dev_base_lock); return addr; } @@ -1240,18 +1234,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { struct in_device *in_dev; - rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } +/* called with RTNL locked */ static void inet_forward_change(struct net *net) { struct net_device *dev; @@ -1260,7 +1254,6 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; if (on) @@ -1271,7 +1264,6 @@ static void inet_forward_change(struct net *net) IN_DEV_CONF_SET(in_dev, FORWARDING, on); rcu_read_unlock(); } - read_unlock(&dev_base_lock); } static int devinet_conf_proc(ctl_table *ctl, int write, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 91864840961..024bba30de2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) if (changed) dev_forward_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) @@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, hiscore->rule = -1; hiscore->ifa = NULL; - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { struct inet6_dev *idev; /* Candidate Source Address (section 4) @@ -1235,7 +1232,6 @@ try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); - read_unlock(&dev_base_lock); if (!hiscore->ifa) return -EADDRNOTAVAIL; @@ -4052,9 +4048,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -4062,9 +4057,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) if (changed) dev_disable_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 1ae58bec1de..2f00ca83f04 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, if (dev) return ipv6_chk_acast_dev(dev, addr); - read_lock(&dev_base_lock); - for_each_netdev(net, dev) + rcu_read_lock(); + for_each_netdev_rcu(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return found; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 4eb1ac9a767..aacba76070f 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -597,15 +597,15 @@ struct net_device *nr_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -617,16 +617,17 @@ struct net_device *nr_dev_get(ax25_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && + ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 9478d9b3d97..d936226916f 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -600,13 +600,13 @@ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -618,8 +618,8 @@ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -627,7 +627,7 @@ struct net_device *rose_dev_get(rose_address *addr) } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } @@ -635,14 +635,14 @@ static int rose_dev_exists(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev != NULL; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d9f4cc2c786..fe44c57101d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -205,14 +205,14 @@ static void sctp_get_local_addr_list(void) struct list_head *pos; struct sctp_af *af; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } /* Free the existing local addresses. */ -- cgit v1.2.3-70-g09d2 From 13f18aa05f5abe135f47b6417537ae2b2fedc18c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 5 Nov 2009 20:44:37 -0800 Subject: net: drop capability from protocol definitions struct can_proto had a capability field which wasn't ever used. It is dropped entirely. struct inet_protosw had a capability field which can be more clearly expressed in the code by just checking if sock->type = SOCK_RAW. Signed-off-by: Eric Paris Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 -- include/net/protocol.h | 4 ---- net/can/af_can.c | 5 ----- net/can/bcm.c | 1 - net/can/raw.c | 1 - net/dccp/ipv4.c | 1 - net/dccp/ipv6.c | 1 - net/ipv4/af_inet.c | 5 +---- net/ipv4/udplite.c | 1 - net/ipv6/af_inet6.c | 2 +- net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/ipv6/udplite.c | 1 - net/sctp/ipv6.c | 2 -- net/sctp/protocol.c | 2 -- 16 files changed, 2 insertions(+), 29 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 25085cbadcf..6c507bea275 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -32,14 +32,12 @@ * struct can_proto - CAN protocol structure * @type: type argument in socket() syscall, e.g. SOCK_DGRAM. * @protocol: protocol number in socket() syscall. - * @capability: capability needed to open the socket, or -1 for no restriction. * @ops: pointer to struct proto_ops for sock->ops. * @prot: pointer to struct proto structure. */ struct can_proto { int type; int protocol; - int capability; struct proto_ops *ops; struct proto *prot; }; diff --git a/include/net/protocol.h b/include/net/protocol.h index 89932d45da5..f1effdd3c26 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -82,10 +82,6 @@ struct inet_protosw { struct proto *prot; const struct proto_ops *ops; - int capability; /* Which (if any) capability do - * we need to use this socket - * interface? - */ char no_check; /* checksum on rcv/xmit/none? */ unsigned char flags; /* See INET_PROTOSW_* below. */ }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 3f2eb27e1ff..9c0426dc318 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -160,11 +160,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol) goto errout; } - if (cp->capability >= 0 && !capable(cp->capability)) { - err = -EPERM; - goto errout; - } - sock->ops = cp->ops; sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); diff --git a/net/can/bcm.c b/net/can/bcm.c index 2f47039c79d..67b5433db13 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1576,7 +1576,6 @@ static struct proto bcm_proto __read_mostly = { static struct can_proto bcm_can_proto __read_mostly = { .type = SOCK_DGRAM, .protocol = CAN_BCM, - .capability = -1, .ops = &bcm_ops, .prot = &bcm_proto, }; diff --git a/net/can/raw.c b/net/can/raw.c index 6e77db58b9e..abca920440b 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -742,7 +742,6 @@ static struct proto raw_proto __read_mostly = { static struct can_proto raw_can_proto __read_mostly = { .type = SOCK_RAW, .protocol = CAN_RAW, - .capability = -1, .ops = &raw_ops, .prot = &raw_proto, }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 00028d4b09d..2423a086673 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -991,7 +991,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6d89f9f7d5d..50ea91a7770 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1185,7 +1185,6 @@ static struct inet_protosw dccp_v6_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, - .capability = -1, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 538e84d0bcb..180ec4c9491 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -325,7 +325,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW)) goto out_rcu_unlock; err = -EAFNOSUPPORT; @@ -947,7 +947,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, @@ -958,7 +957,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -969,7 +967,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 470c504b955..66f79513f4a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9105b25defe..1b388935659 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -158,7 +158,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cb834ab7f07..818ef21ba76 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1336,7 +1336,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 34925f089e0..696a22f034e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2112,7 +2112,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d3b59d73f50..bbe2f3e445f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1286,7 +1286,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .capability =-1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index d737a27ee01..6ea6938919e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bb280e60e00..bacd6a7318b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -930,7 +930,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -939,7 +938,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fe44c57101d..08ef203d36a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -909,7 +909,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -918,7 +917,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; -- cgit v1.2.3-70-g09d2 From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- include/net/sctp/constants.h | 4 ++++ include/net/sctp/structs.h | 6 ++++++ net/sctp/associola.c | 5 +++-- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +-- net/sctp/sysctl.c | 13 +++++++++++++ 6 files changed, 30 insertions(+), 4 deletions(-) (limited to 'net/sctp/protocol.c') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 58f714a3b67..63908840eef 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -308,6 +308,10 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ +#define SCTP_DEFAULT_RWND_SHIFT 4 /* by default, update on 1/16 of + * rcvbuf, which is 1/8 of initial + * window + */ #define SCTP_DEFAULT_MAXSEGMENT 1500 /* MTU size, this is the limit * to which we will raise the P-MTU. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index cd2e18778f8..90876b65777 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -231,6 +231,11 @@ extern struct sctp_globals { /* Flag to indicate whether computing and verifying checksum * is disabled. */ int checksum_disable; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_update_shift; } sctp_globals; #define sctp_rto_initial (sctp_globals.rto_initial) @@ -267,6 +272,7 @@ extern struct sctp_globals { #define sctp_prsctp_enable (sctp_globals.prsctp_enable) #define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) +#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) /* SCTP Socket type: UDP or TCP style. */ typedef enum { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8e755ebff3b..37e982510be 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1383,8 +1383,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 08ef203d36a..a3c8988758b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eda4fe783be..8ae67098e09 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120..ae03ded2bf1 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -284,6 +285,18 @@ static ctl_table sctp_table[] = { .extra1 = &zero, .extra2 = &addr_scope_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + { .ctl_name = 0 } }; -- cgit v1.2.3-70-g09d2