diff options
Diffstat (limited to 'net')
41 files changed, 381 insertions, 375 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1583c5ef963..2a546919d6f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -562,8 +562,6 @@ static int register_vlan_device(struct net_device *real_dev, if (err < 0) goto out_free_newdev; - /* Account for reference in struct vlan_dev_info */ - dev_hold(real_dev); #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 63caa414945..18e3afc964d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -183,6 +183,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) { struct sk_buff *skb; __le16 param; + __u8 flt_type; BT_DBG("%s %ld", hdev->name, opt); @@ -233,11 +234,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Optional initialization */ /* Clear Event Filters */ - { - struct hci_cp_set_event_flt cp; - cp.flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, sizeof(cp), &cp); - } + flt_type = HCI_FLT_CLEAR_ALL; + hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, 1, &flt_type); /* Page timeout ~20 secs */ param = cpu_to_le16(0x8000); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1dae3dfc66a..5ccea5fbd23 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -37,6 +37,7 @@ #include <linux/skbuff.h> #include <linux/workqueue.h> #include <linux/interrupt.h> +#include <linux/compat.h> #include <linux/socket.h> #include <linux/ioctl.h> #include <net/sock.h> @@ -70,15 +71,15 @@ static struct hci_sec_filter hci_sec_filter = { { { 0x0 }, /* OGF_LINK_CTL */ - { 0xbe000006, 0x00000001, 0x000000, 0x00 }, + { 0xbe000006, 0x00000001, 0x00000000, 0x00 }, /* OGF_LINK_POLICY */ - { 0x00005200, 0x00000000, 0x000000, 0x00 }, + { 0x00005200, 0x00000000, 0x00000000, 0x00 }, /* OGF_HOST_CTL */ - { 0xaab00200, 0x2b402aaa, 0x020154, 0x00 }, + { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 }, /* OGF_INFO_PARAM */ - { 0x000002be, 0x00000000, 0x000000, 0x00 }, + { 0x000002be, 0x00000000, 0x00000000, 0x00 }, /* OGF_STATUS_PARAM */ - { 0x000000ea, 0x00000000, 0x000000, 0x00 } + { 0x000000ea, 0x00000000, 0x00000000, 0x00 } } }; @@ -342,9 +343,24 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ if (mask & HCI_CMSG_TSTAMP) { struct timeval tv; + void *data; + int len; skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + + data = &tv; + len = sizeof(tv); +#ifdef CONFIG_COMPAT + if (msg->msg_flags & MSG_CMSG_COMPAT) { + struct compat_timeval ctv; + ctv.tv_sec = tv.tv_sec; + ctv.tv_usec = tv.tv_usec; + data = &ctv; + len = sizeof(ctv); + } +#endif + + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data); } } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0eded176ce9..99292e8e1d0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -41,11 +41,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (dest[0] & 1) - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); else if ((dst = __br_fdb_get(br, dest)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); return 0; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ada7f495445..bdd7c35c3c7 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -100,24 +100,13 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) } /* called under bridge lock */ -static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, +static void br_flood(struct net_bridge *br, struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { struct net_bridge_port *p; struct net_bridge_port *prev; - if (clone) { - struct sk_buff *skb2; - - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; - return; - } - - skb = skb2; - } - prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { @@ -148,13 +137,13 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_deliver); + br_flood(br, skb, __br_deliver); } /* called under bridge lock */ -void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_forward); + br_flood(br, skb, __br_forward); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6f468fc3357..3a8a015c92e 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -43,7 +43,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; struct net_bridge_fdb_entry *dst; - int passedup = 0; + struct sk_buff *skb2; if (!p || p->state == BR_STATE_DISABLED) goto drop; @@ -55,39 +55,35 @@ int br_handle_frame_finish(struct sk_buff *skb) if (p->state == BR_STATE_LEARNING) goto drop; - if (br->dev->flags & IFF_PROMISC) { - struct sk_buff *skb2; + /* The packet skb2 goes to the local host (NULL to skip). */ + skb2 = NULL; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 != NULL) { - passedup = 1; - br_pass_frame_up(br, skb2); - } - } + if (br->dev->flags & IFF_PROMISC) + skb2 = skb; + + dst = NULL; if (is_multicast_ether_addr(dest)) { br->statistics.multicast++; - br_flood_forward(br, skb, !passedup); - if (!passedup) - br_pass_frame_up(br, skb); - goto out; + skb2 = skb; + } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + skb2 = skb; + /* Do not forward the packet since it's local. */ + skb = NULL; } - dst = __br_fdb_get(br, dest); - if (dst != NULL && dst->is_local) { - if (!passedup) - br_pass_frame_up(br, skb); - else - kfree_skb(skb); - goto out; - } + if (skb2 == skb) + skb2 = skb_clone(skb, GFP_ATOMIC); - if (dst != NULL) { - br_forward(dst->dst, skb); - goto out; - } + if (skb2) + br_pass_frame_up(br, skb2); - br_flood_forward(br, skb, 0); + if (skb) { + if (dst) + br_forward(dst->dst, skb); + else + br_flood_forward(br, skb); + } out: return 0; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3ee2022928e..fc13130035e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -183,7 +183,7 @@ int nf_bridge_copy_header(struct sk_buff *skb) int err; int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow(skb, header_size); + err = skb_cow_head(skb, header_size); if (err) return err; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 21bf3a9a03f..e6dc6f52990 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -170,12 +170,8 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, - struct sk_buff *skb, - int clone); -extern void br_flood_forward(struct net_bridge *br, - struct sk_buff *skb, - int clone); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 4169a2a89a3..6018d0e5193 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1513,6 +1513,7 @@ static struct nf_sockopt_ops ebt_sockopts = .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, + .owner = THIS_MODULE, }; static int __init ebtables_init(void) diff --git a/net/core/datagram.c b/net/core/datagram.c index cb056f47612..029b93e246b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -450,6 +450,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, __wsum csum; int chunk = skb->len - hlen; + if (!chunk) + return 0; + /* Skip filled elements. * Pretty silly, look at memcpy_toiovec, though 8) */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 36fdea71d74..803d0c8826a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -111,6 +111,9 @@ * * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> * + * Fixed src_mac command to set source mac of packet to value specified in + * command by Adit Ranadive <adit.262@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -1451,8 +1454,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac")) { char *v = valstr; + unsigned char old_smac[ETH_ALEN]; unsigned char *m = pkt_dev->src_mac; + memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); + len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { return len; @@ -1481,6 +1487,10 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Src MAC */ + if (compare_ether_addr(old_smac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } diff --git a/net/core/sock.c b/net/core/sock.c index cfed7d42c48..190de61cd64 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -362,6 +362,61 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); +static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + char devname[IFNAMSIZ]; + int index; + + /* Sorry... */ + ret = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; + + ret = -EINVAL; + if (optlen < 0) + goto out; + + /* Bind this socket to a particular device like "eth0", + * as specified in the passed interface name. If the + * name is "" or the option length is zero the socket + * is not bound. + */ + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + memset(devname, 0, sizeof(devname)); + + ret = -EFAULT; + if (copy_from_user(devname, optval, optlen)) + goto out; + + if (devname[0] == '\0') { + index = 0; + } else { + struct net_device *dev = dev_get_by_name(devname); + + ret = -ENODEV; + if (!dev) + goto out; + + index = dev->ifindex; + dev_put(dev); + } + + lock_sock(sk); + sk->sk_bound_dev_if = index; + sk_dst_reset(sk); + release_sock(sk); + + ret = 0; + +out: +#endif + + return ret; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -390,6 +445,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif + if (optname == SO_BINDTODEVICE) + return sock_bindtodevice(sk, optval, optlen); + if (optlen < sizeof(int)) return -EINVAL; @@ -578,54 +636,6 @@ set_rcvbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; -#ifdef CONFIG_NETDEVICES - case SO_BINDTODEVICE: - { - char devname[IFNAMSIZ]; - - /* Sorry... */ - if (!capable(CAP_NET_RAW)) { - ret = -EPERM; - break; - } - - /* Bind this socket to a particular device like "eth0", - * as specified in the passed interface name. If the - * name is "" or the option length is zero the socket - * is not bound. - */ - - if (!valbool) { - sk->sk_bound_dev_if = 0; - } else { - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { - ret = -EFAULT; - break; - } - - /* Remove any cached route for this socket. */ - sk_dst_reset(sk); - - if (devname[0] == '\0') { - sk->sk_bound_dev_if = 0; - } else { - struct net_device *dev = dev_get_by_name(devname); - if (!dev) { - ret = -ENODEV; - break; - } - sk->sk_bound_dev_if = dev->ifindex; - dev_put(dev); - } - } - break; - } -#endif - - case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index fa6604fcf0e..8def68209ed 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -814,7 +814,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) { if (dn_idx < skip_naddr) - goto cont; + continue; if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5b77bdaa57d..5dbe5803b7d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1193,7 +1193,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { if (ip_idx < s_ip_idx) - goto cont; + continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dbeacd8b0f9..def007ec1d6 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -836,12 +836,16 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact(skb, nlh); } +static DEFINE_MUTEX(inet_diag_mutex); + static void inet_diag_rcv(struct sock *sk, int len) { unsigned int qlen = 0; do { + mutex_lock(&inet_diag_mutex); netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } while (qlen); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 902fd578aa3..f656d41d8d4 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -2339,6 +2339,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .get_optmin = IP_VS_BASE_CTL, .get_optmax = IP_VS_SO_GET_MAX+1, .get = do_ip_vs_get_ctl, + .owner = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1149aba935..29114a9ccd1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1161,6 +1161,7 @@ static struct nf_sockopt_ops arpt_sockopts = { .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, + .owner = THIS_MODULE, }; static int __init arp_tables_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e1b402c6b85..6486894f450 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2296,6 +2296,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #ifdef CONFIG_COMPAT .compat_get = compat_do_ipt_get_ctl, #endif + .owner = THIS_MODULE, }; static struct xt_match icmp_matchstruct __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d9b5177989c..f813e02aab3 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -87,14 +87,10 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, if (iph == NULL) return -NF_DROP; - /* Never happen */ - if (iph->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) { - printk(KERN_ERR "ipv4_get_l4proto: Frag of proto %u\n", - iph->protocol); - } + /* Conntrack defragments packets, we might still see fragments + * inside ICMP packets though. */ + if (iph->frag_off & htons(IP_OFFSET)) return -NF_DROP; - } *dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; @@ -403,6 +399,7 @@ static struct nf_sockopt_ops so_getorigdst = { .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst, + .owner = THIS_MODULE, }; struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1ee72127462..bbad2cdb74b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -560,7 +560,7 @@ static u32 tcp_rto_min(struct sock *sk) struct dst_entry *dst = __sk_dst_get(sk); u32 rto_min = TCP_RTO_MIN; - if (dst_metric_locked(dst, RTAX_RTO_MIN)) + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) rto_min = dst->metrics[RTAX_RTO_MIN-1]; return rto_min; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28355350fb6..69d4bd10f9c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -505,6 +505,8 @@ send: out: up->len = 0; up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -693,10 +695,8 @@ out: ip_rt_put(rt); if (free) kfree(ipc.opt); - if (!err) { - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 91ef3be5aba..45b4c82148a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1021,7 +1021,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, hiscore.rule++; } if (ipv6_saddr_preferred(score.addr_type) || - (((ifa_result->flags & + (((ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { score.attrs |= IPV6_SADDR_SCORE_PREFERRED; if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5dead399fe6..26de3c0ea31 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1427,8 +1427,9 @@ void ip6_flush_pending_frames(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), - IPSTATS_MIB_OUTDISCARDS); + if (skb->dst) + IP6_INC_STATS(ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0358e6066a4..73a894a2152 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -736,7 +736,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) * so fail our DAD process */ addrconf_dad_failure(ifp); - goto out; + return; } else { /* * This is not a dad solicitation. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index aeda617246b..cd9df02bb85 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1462,6 +1462,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, + .owner = THIS_MODULE, }; static struct xt_match icmp6_matchstruct __read_mostly = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e27383d855d..77167afa345 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -882,11 +882,10 @@ back_from_confirm: ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = rawv6_push_pending_frames(sk, &fl, rp); + release_sock(sk); } done: dst_release(dst); - if (!inet->hdrincl) - release_sock(sk); out: fl6_sock_release(flowlabel); return err<0?err:len; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4210951edb6..c347f3e30e2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -555,6 +555,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) out: up->len = 0; up->pending = 0; + if (!err) + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -823,10 +825,8 @@ do_append_data: release_sock(sk); out: fl6_sock_release(flowlabel); - if (!err) { - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 8b8ece75031..e32761ce260 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -55,18 +55,7 @@ EXPORT_SYMBOL(nf_register_sockopt); void nf_unregister_sockopt(struct nf_sockopt_ops *reg) { - /* No point being interruptible: we're probably in cleanup_module() */ - restart: mutex_lock(&nf_sockopt_mutex); - if (reg->use != 0) { - /* To be woken by nf_sockopt call... */ - /* FIXME: Stuart Young's name appears gratuitously. */ - set_current_state(TASK_UNINTERRUPTIBLE); - reg->cleanup_task = current; - mutex_unlock(&nf_sockopt_mutex); - schedule(); - goto restart; - } list_del(®->list); mutex_unlock(&nf_sockopt_mutex); } @@ -86,10 +75,11 @@ static int nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->get(sk, val, opt, len); goto out; @@ -97,23 +87,20 @@ static int nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); ret = ops->set(sk, val, opt, *len); goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } @@ -144,10 +131,12 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, list_for_each(i, &nf_sockopts) { ops = (struct nf_sockopt_ops *)i; if (ops->pf == pf) { + if (!try_module_get(ops->owner)) + goto out_nosup; + if (get) { if (val >= ops->get_optmin && val < ops->get_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_get) ret = ops->compat_get(sk, @@ -160,7 +149,6 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, } else { if (val >= ops->set_optmin && val < ops->set_optmax) { - ops->use++; mutex_unlock(&nf_sockopt_mutex); if (ops->compat_set) ret = ops->compat_set(sk, @@ -171,17 +159,15 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, goto out; } } + module_put(ops->owner); } } + out_nosup: mutex_unlock(&nf_sockopt_mutex); return -ENOPROTOOPT; out: - mutex_lock(&nf_sockopt_mutex); - ops->use--; - if (ops->cleanup_task) - wake_up_process(ops->cleanup_task); - mutex_unlock(&nf_sockopt_mutex); + module_put(ops->owner); return ret; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e185a5b5591..2351533a850 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -58,7 +58,6 @@ struct nfulnl_instance { unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ struct timer_list timer; int peer_pid; /* PID of the peer process */ @@ -345,10 +344,12 @@ static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, static int __nfulnl_send(struct nfulnl_instance *inst) { - int status; + int status = -1; if (inst->qlen > 1) - inst->lastnlh->nlmsg_type = NLMSG_DONE; + NLMSG_PUT(inst->skb, 0, 0, + NLMSG_DONE, + sizeof(struct nfgenmsg)); status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); if (status < 0) { @@ -358,8 +359,8 @@ __nfulnl_send(struct nfulnl_instance *inst) inst->qlen = 0; inst->skb = NULL; - inst->lastnlh = NULL; +nlmsg_failure: return status; } @@ -538,7 +539,6 @@ __build_packet_message(struct nfulnl_instance *inst, } nlh->nlmsg_len = inst->skb->tail - old_tail; - inst->lastnlh = nlh; return 0; nlmsg_failure: @@ -644,7 +644,8 @@ nfulnl_log_packet(unsigned int pf, } if (inst->qlen >= qthreshold || - (inst->skb && size > skb_tailroom(inst->skb))) { + (inst->skb && size > + skb_tailroom(inst->skb) - sizeof(struct nfgenmsg))) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ UDEBUG("flushing old skb\n"); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index feef366cad5..72cdb0fade2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; struct rtattr *r ; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); s_i = cb->args[0]; @@ -96,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -156,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { struct tcf_common *p; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; p = p->tcfc_next) { if (p->tcfc_index == index) break; } - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); return p; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6085be57845..17f6f27e28a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -56,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; - read_lock(&police_lock); + read_lock_bh(&police_lock); s_i = cb->args[0]; @@ -85,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock(&police_lock); + read_unlock_bh(&police_lock); if (n_i) cb->args[0] += n_i; return n_i; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e38c2839b25..cbef3bbfc20 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -380,7 +380,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); int len = skb->len; - int ret; + int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 95795730985..3a23e30bc79 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -270,7 +270,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit-1) { + if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += skb->len; sch->bstats.packets++; return 0; @@ -306,7 +306,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) q->tail = x; } } - if (++sch->q.qlen < q->limit - 1) { + if (++sch->q.qlen <= q->limit) { sch->qstats.requeues++; return 0; } @@ -391,10 +391,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) q->quantum = ctl->quantum ? : psched_mtu(sch->dev); q->perturb_period = ctl->perturb_period*HZ; if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 2); qlen = sch->q.qlen; - while (sch->q.qlen >= q->limit-1) + while (sch->q.qlen > q->limit) sfq_drop(sch); qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); @@ -423,7 +423,7 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt) q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; } - q->limit = SFQ_DEPTH; + q->limit = SFQ_DEPTH - 2; q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2ad1caf1ea4..9bad8ba0fed 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); - rwlock_init(&asoc->base.addr_lock); asoc->state = SCTP_STATE_CLOSED; @@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, { struct sctp_transport *transport; - sctp_read_lock(&asoc->base.addr_lock); - if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && (htons(asoc->peer.port) == paddr->v4.sin_port)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); @@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, transport = NULL; out: - sctp_read_unlock(&asoc->base.addr_lock); return transport; } @@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr) { - int found; + int found = 0; - sctp_read_lock(&asoc->base.addr_lock); if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) && sctp_bind_addr_match(&asoc->base.bind_addr, laddr, - sctp_sk(asoc->base.sk))) { + sctp_sk(asoc->base.sk))) found = 1; - goto out; - } - found = 0; -out: - sctp_read_unlock(&asoc->base.addr_lock); return found; } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index fdb287a9e2e..d35cbf5aae3 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -163,9 +163,15 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, addr->a.v4.sin_port = htons(bp->port); addr->use_as_src = use_as_src; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); - list_add_tail(&addr->list, &bp->address_list); + INIT_RCU_HEAD(&addr->rcu); + + /* We always hold a socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_add_tail_rcu(&addr->list, &bp->address_list); SCTP_DBG_OBJCNT_INC(addr); return 0; @@ -174,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, /* Delete an address from the bind address list in the SCTP_bind_addr * structure. */ -int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) +int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, + void (*rcu_call)(struct rcu_head *head, + void (*func)(struct rcu_head *head))) { - struct list_head *pos, *temp; - struct sctp_sockaddr_entry *addr; + struct sctp_sockaddr_entry *addr, *temp; - list_for_each_safe(pos, temp, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* We hold the socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_for_each_entry_safe(addr, temp, &bp->address_list, list) { if (sctp_cmp_addr_exact(&addr->a, del_addr)) { /* Found the exact match. */ - list_del(pos); - kfree(addr); - SCTP_DBG_OBJCNT_DEC(addr); - - return 0; + addr->valid = 0; + list_del_rcu(&addr->list); + break; } } + /* Call the rcu callback provided in the args. This function is + * called by both BH packet processing and user side socket option + * processing, but it works on different lists in those 2 contexts. + * Each context provides it's own callback, whether call_rcu_bh() + * or call_rcu(), to make sure that we wait for an appropriate time. + */ + if (addr && !addr->valid) { + rcu_call(&addr->rcu, sctp_local_addr_free); + SCTP_DBG_OBJCNT_DEC(addr); + } + return -EINVAL; } @@ -300,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, struct sctp_sock *opt) { struct sctp_sockaddr_entry *laddr; - struct list_head *pos; - - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (opt->pf->cmp_addr(&laddr->a, addr, opt)) - return 1; + int match = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; + if (opt->pf->cmp_addr(&laddr->a, addr, opt)) { + match = 1; + break; + } } + rcu_read_unlock(); - return 0; + return match; } /* Find the first address in the bind address list that is not present in @@ -323,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, union sctp_addr *addr; void *addr_buf; struct sctp_af *af; - struct list_head *pos; int i; - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + /* This is only called sctp_send_asconf_del_ip() and we hold + * the socket lock in that code patch, so that address list + * can't change. + */ + list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) - return NULL; + break; if (opt->pf->cmp_addr(&laddr->a, addr, opt)) break; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 1404a9e2e78..8f485a0d14b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - rwlock_init(&ep->base.addr_lock); /* Remember who we are attached to. */ ep->base.sk = sk; @@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, const union sctp_addr *laddr) { - struct sctp_endpoint *retval; + struct sctp_endpoint *retval = NULL; - sctp_read_lock(&ep->base.addr_lock); if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, - sctp_sk(ep->base.sk))) { + sctp_sk(ep->base.sk))) retval = ep; - goto out; - } } - retval = NULL; - -out: - sctp_read_unlock(&ep->base.addr_lock); return retval; } @@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( list_for_each(pos, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); if (rport == asoc->peer.port) { - sctp_read_lock(&asoc->base.addr_lock); *transport = sctp_assoc_lookup_paddr(asoc, paddr); - sctp_read_unlock(&asoc->base.addr_lock); if (*transport) return asoc; @@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc( int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { - struct list_head *pos; struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; - sctp_read_lock(&ep->base.addr_lock); bp = &ep->base.bind_addr; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (sctp_has_association(&addr->a, paddr)) { - sctp_read_unlock(&ep->base.addr_lock); + /* This function is called with the socket lock held, + * so the address_list can not change. + */ + list_for_each_entry(addr, &bp->address_list, list) { + if (sctp_has_association(&addr->a, paddr)) return 1; - } } - sctp_read_unlock(&ep->base.addr_lock); return 0; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f8aa23dda1c..670fd2740b8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -77,13 +77,18 @@ #include <asm/uaccess.h> -/* Event handler for inet6 address addition/deletion events. */ +/* Event handler for inet6 address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, memcpy(&addr->a.v6.sin6_addr, &ifa->addr, sizeof(struct in6_addr)); addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - list_del(pos); - kfree(addr); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr)) { + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -290,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, union sctp_addr *saddr) { struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; sctp_scope_t scope; union sctp_addr *baddr = NULL; __u8 matchlen = 0; @@ -312,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, scope = sctp_scope(daddr); bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; /* Go through the bind address list and find the best source address * that matches the scope of the destination address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { @@ -341,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr)); } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); } /* Make a copy of all potential local addresses. */ @@ -367,7 +377,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e98579b788b..3d036cdfae4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,6 +153,9 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + addr->valid = 1; + INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } @@ -192,16 +195,24 @@ static void sctp_free_local_addr_list(void) } } +void sctp_local_addr_free(struct rcu_head *head) +{ + struct sctp_sockaddr_entry *e = container_of(head, + struct sctp_sockaddr_entry, rcu); + kfree(e); +} + /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local @@ -213,7 +224,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, 1, - GFP_ATOMIC); + GFP_ATOMIC); if (error) goto end_copy; } @@ -221,6 +232,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: + rcu_read_unlock(); return error; } @@ -416,9 +428,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, struct rtable *rt; struct flowi fl; struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; @@ -447,23 +457,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, goto out; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; if (dst) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, - list); - if (!laddr->use_as_src) + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid || !laddr->use_as_src) continue; sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. @@ -475,10 +482,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and try to get a dst that * matches a bind address as the source address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; @@ -490,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, } out_unlock: - sctp_read_unlock(addr_lock); + rcu_read_unlock(); out: if (dst) SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n", @@ -600,13 +607,18 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. */ +/* Event handler for inet address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -615,19 +627,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - list_del(pos); - kfree(addr); + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -1160,6 +1178,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); + spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ @@ -1227,6 +1246,9 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + /* Unregister notifier for inet address additions/deletions. */ + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Free the local address list. */ sctp_free_local_addr_list(); @@ -1240,9 +1262,6 @@ SCTP_STATIC __exit void sctp_exit(void) inet_unregister_protosw(&sctp_stream_protosw); inet_unregister_protosw(&sctp_seqpacket_protosw); - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 79856c92452..2e34220d94c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1531,7 +1531,7 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, - GFP_ATOMIC); + GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; @@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - list_for_each(pos, &bp->address_list) { - saddr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* This is always done in BH context with a socket lock + * held, so the list can not change. + */ + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) saddr->use_as_src = 1; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); break; case SCTP_PARAM_DEL_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - retval = sctp_del_bind_addr(bp, &addr); - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh); list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33354602ae8..772fbfb4bfd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!bp->port) bp->port = inet_sk(sk)->num; - /* Add the address to the bind address list. */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - /* Use GFP_ATOMIC since BHs are disabled. */ + /* Add the address to the bind address list. + * Use GFP_ATOMIC since BHs will be disabled. + */ ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk, if (i < addrcnt) continue; - /* Use the first address in bind addr list of association as - * Address Parameter of ASCONF CHUNK. + /* Use the first valid address in bind addr list of + * association as Address Parameter of ASCONF CHUNK. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; p = bp->address_list.next; laddr = list_entry(p, struct sctp_sockaddr_entry, list); - sctp_read_unlock(&asoc->base.addr_lock); - chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs, addrcnt, SCTP_PARAM_ADD_IP); if (!chunk) { @@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, /* Add the new addresses to the bind address list with * use_as_src set to 0. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; @@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, GFP_ATOMIC); addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); } out: @@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) * socket routing and failover schemes. Refer to comments in * sctp_do_bind(). -daisy */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - retval = sctp_del_bind_addr(bp, sa_addr); - - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, sa_addr, call_rcu); addr_buf += af->sockaddr_len; err_bindx_rem: @@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, * make sure that we do not delete all the addresses in the * association. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - sctp_read_unlock(&asoc->base.addr_lock); if (!laddr) continue; + /* We do not need RCU protection throughout this loop + * because this is done under a socket lock from the + * setsockopt call. + */ chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt, SCTP_PARAM_DEL_IP); if (!chunk) { @@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { laddr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); - list_for_each(pos1, &bp->address_list) { - saddr = list_entry(pos1, - struct sctp_sockaddr_entry, - list); + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) saddr->use_as_src = 0; } addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); /* Update the route and saddr entries for all the transports * as some of the addresses in the bind address list are @@ -4059,9 +4037,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; - rwlock_t *addr_lock; int cnt = 0; if (len < sizeof(sctp_assoc_t)) @@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, */ if (0 == id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid * addresses from the global local address list. */ @@ -4096,27 +4068,33 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, - struct sctp_sockaddr_entry, - list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, + &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + cnt++; } + rcu_read_unlock(); } else { cnt = 1; } goto done; } - list_for_each(pos, &bp->address_list) { + /* Protection on the bound address list is not needed, + * since in the socket option context we hold the socket lock, + * so there is no way that the bound address list can change. + */ + list_for_each_entry(addr, &bp->address_list, list) { cnt ++; } - done: - sctp_read_unlock(addr_lock); return cnt; } @@ -4127,14 +4105,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, int max_addrs, void *to, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4149,6 +4129,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, cnt ++; if (cnt >= max_addrs) break; } + rcu_read_unlock(); return cnt; } @@ -4156,14 +4137,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4171,8 +4154,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (space_left < addrlen) - return -ENOMEM; + if (space_left < addrlen) { + cnt = -ENOMEM; + break; + } memcpy(to, &temp, addrlen); to += addrlen; @@ -4180,6 +4165,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, space_left -= addrlen; *bytes_copied += addrlen; } + rcu_read_unlock(); return cnt; } @@ -4192,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs_old getaddrs; struct sctp_sockaddr_entry *addr; @@ -4200,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; void *addrs; void *buf; @@ -4222,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = getaddrs.addrs; @@ -4242,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4259,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; @@ -4272,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - /* copy the entire address list into the user provided space */ if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; @@ -4295,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs getaddrs; struct sctp_sockaddr_entry *addr; @@ -4303,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; size_t space_left; int bytes_copied = 0; @@ -4324,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = optval + offsetof(struct sctp_getaddrs,addrs); @@ -4340,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4353,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, space_left, &bytes_copied); if (cnt < 0) { err = cnt; - goto error_lock; + goto out; } goto copy_getaddrs; } } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) { err = -ENOMEM; /*fixme: right error?*/ - goto error_lock; + goto out; } memcpy(buf, &temp, addrlen); buf += addrlen; @@ -4377,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; goto out; @@ -4389,12 +4365,6 @@ copy_getaddrs: } if (put_user(bytes_copied, optlen)) err = -EFAULT; - - goto out; - -error_lock: - sctp_read_unlock(addr_lock); - out: kfree(addrs); return err; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 12ff5da8160..036ab520df2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1110,7 +1110,8 @@ svc_tcp_accept(struct svc_sock *svsk) serv->sv_name); printk(KERN_NOTICE "%s: last TCP connect from %s\n", - serv->sv_name, buf); + serv->sv_name, __svc_print_addr(sin, + buf, sizeof(buf))); } /* * Always select the oldest socket. It's not fair, @@ -1592,7 +1593,7 @@ svc_age_temp_sockets(unsigned long closure) if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) continue; - if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags)) + if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) continue; atomic_inc(&svsk->sk_inuse); list_move(le, &to_be_aged); |