diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2008-12-30 08:02:35 +1030 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2008-12-30 08:02:35 +1030 |
commit | 33edcf133ba93ecba2e4b6472e97b689895d805c (patch) | |
tree | 327d7a20acef64005e7c5ccbfa1265be28aeb6ac /net | |
parent | be4d638c1597580ed2294d899d9f1a2cd10e462c (diff) | |
parent | 3c92ec8ae91ecf59d88c798301833d7cf83f2179 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'net')
444 files changed, 15010 insertions, 12396 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 0549317b935..f1611a1e06a 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -167,23 +167,27 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -static int fddi_change_mtu(struct net_device *dev, int new_mtu) +int fddi_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) return(-EINVAL); dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, .rebuild = fddi_rebuild_header, }; + static void fddi_setup(struct net_device *dev) { - dev->change_mtu = fddi_change_mtu; dev->header_ops = &fddi_header_ops; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + dev->change_mtu = fddi_change_mtu, +#endif dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ diff --git a/net/802/hippi.c b/net/802/hippi.c index e35dc1e0915..313b9ebf92e 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -144,7 +144,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -static int hippi_change_mtu(struct net_device *dev, int new_mtu) +int hippi_change_mtu(struct net_device *dev, int new_mtu) { /* * HIPPI's got these nice large MTUs. @@ -154,12 +154,13 @@ static int hippi_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(hippi_change_mtu); /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. */ -static int hippi_mac_addr(struct net_device *dev, void *p) +int hippi_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (netif_running(dev)) @@ -167,8 +168,9 @@ static int hippi_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } +EXPORT_SYMBOL(hippi_mac_addr); -static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) +int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) { /* Never send broadcast/multicast ARP messages */ p->mcast_probes = 0; @@ -181,6 +183,7 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) p->ucast_probes = 0; return 0; } +EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, @@ -190,11 +193,12 @@ static const struct header_ops hippi_header_ops = { static void hippi_setup(struct net_device *dev) { - dev->set_multicast_list = NULL; +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = hippi_change_mtu; - dev->header_ops = &hippi_header_ops; dev->set_mac_address = hippi_mac_addr; dev->neigh_setup = hippi_neigh_setup_dev; +#endif + dev->header_ops = &hippi_header_ops; /* * We don't support HIPPI `ARP' for the time being, and probably diff --git a/net/802/tr.c b/net/802/tr.c index 18c66475d8c..158150fee46 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -285,10 +285,7 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh, if(entry) { #if TR_SR_DEBUG -{ -DECLARE_MAC_BUF(mac); -printk("source routing for %s\n",print_mac(mac, trh->daddr)); -} +printk("source routing for %pM\n", trh->daddr); #endif if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8) { @@ -370,9 +367,8 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) if(entry==NULL) { #if TR_SR_DEBUG - DECLARE_MAC_BUF(mac); - printk("adding rif_entry: addr:%s rcf:%04X\n", - print_mac(mac, trh->saddr), ntohs(trh->rcf)); + printk("adding rif_entry: addr:%pM rcf:%04X\n", + trh->saddr, ntohs(trh->rcf)); #endif /* * Allocate our new entry. A failure to allocate loses @@ -417,11 +413,8 @@ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) !(trh->rcf & htons(TR_RCF_BROADCAST_MASK))) { #if TR_SR_DEBUG -{ -DECLARE_MAC_BUF(mac); -printk("updating rif_entry: addr:%s rcf:%04X\n", - print_mac(mac, trh->saddr), ntohs(trh->rcf)); -} +printk("updating rif_entry: addr:%pM rcf:%04X\n", + trh->saddr, ntohs(trh->rcf)); #endif entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK); memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short)); @@ -532,7 +525,6 @@ static int rif_seq_show(struct seq_file *seq, void *v) { int j, rcf_len, segment, brdgnmb; struct rif_cache *entry = v; - DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -542,9 +534,9 @@ static int rif_seq_show(struct seq_file *seq, void *v) long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; - seq_printf(seq, "%s %s %7li ", + seq_printf(seq, "%s %pM %7li ", dev?dev->name:"?", - print_mac(mac, entry->addr), + entry->addr, ttl/HZ); if (entry->local_ring) @@ -643,7 +635,7 @@ static struct ctl_table tr_table[] = { .data = &sysctl_tr_rif_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { 0 }, }; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f0e335aa20d..41e8f65bd3f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -46,10 +46,10 @@ int vlan_net_id; /* Our listing of VLAN group(s) */ static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; -static char vlan_fullname[] = "802.1Q VLAN Support"; -static char vlan_version[] = DRV_VERSION; -static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; -static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; +const char vlan_fullname[] = "802.1Q VLAN Support"; +const char vlan_version[] = DRV_VERSION; +static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; +static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; static struct packet_type vlan_packet_type = { .type = __constant_htons(ETH_P_8021Q), @@ -144,6 +144,7 @@ void unregister_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; struct vlan_group *grp; u16 vlan_id = vlan->vlan_id; @@ -156,7 +157,7 @@ void unregister_vlan_dev(struct net_device *dev) * HW accelerating devices or SW vlan input packet processing. */ if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_kill_vid(real_dev, vlan_id); + ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); vlan_group_set_device(grp, vlan_id, NULL); grp->nr_vlans--; @@ -170,7 +171,7 @@ void unregister_vlan_dev(struct net_device *dev) vlan_gvrp_uninit_applicant(real_dev); if (real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, NULL); + ops->ndo_vlan_rx_register(real_dev, NULL); hlist_del_rcu(&grp->hlist); @@ -205,21 +206,21 @@ static void vlan_transfer_operstate(const struct net_device *dev, int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { - char *name = real_dev->name; + const char *name = real_dev->name; + const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("8021q: VLANs not supported on %s\n", name); return -EOPNOTSUPP; } - if ((real_dev->features & NETIF_F_HW_VLAN_RX) && - !real_dev->vlan_rx_register) { + if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) { pr_info("8021q: device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { + (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } @@ -240,6 +241,7 @@ int register_vlan_dev(struct net_device *dev) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; u16 vlan_id = vlan->vlan_id; struct vlan_group *grp, *ngrp = NULL; int err; @@ -275,9 +277,9 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) - real_dev->vlan_rx_register(real_dev, ngrp); + ops->ndo_vlan_rx_register(real_dev, ngrp); if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - real_dev->vlan_rx_add_vid(real_dev, vlan_id); + ops->ndo_vlan_rx_add_vid(real_dev, vlan_id); return 0; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a6603a4d917..82570bc2a18 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -108,8 +108,10 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif -int vlan_netlink_init(void); -void vlan_netlink_fini(void); +extern const char vlan_fullname[]; +extern const char vlan_version[]; +extern int vlan_netlink_init(void); +extern void vlan_netlink_fini(void); extern struct rtnl_link_ops vlan_link_ops; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 68ced4bf158..dd86a1dc4cd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -47,8 +47,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - dev->last_rx = jiffies; - stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8883e9c8a22..89a3bbdfca3 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -163,8 +163,6 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_unlock; } - skb->dev->last_rx = jiffies; - stats = &skb->dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; @@ -526,6 +524,7 @@ out: static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -536,8 +535,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (real_dev->do_ioctl && netif_device_present(real_dev)) - err = real_dev->do_ioctl(real_dev, &ifrr, cmd); + if (netif_device_present(real_dev) && ops->ndo_do_ioctl) + err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); break; } @@ -594,6 +593,8 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops; + static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; @@ -620,11 +621,11 @@ static int vlan_dev_init(struct net_device *dev) if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; - dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; + dev->netdev_ops = &vlan_netdev_accel_ops; } else { dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; - dev->hard_start_xmit = vlan_dev_hard_start_xmit; + dev->netdev_ops = &vlan_netdev_ops; } if (is_vlan_dev(real_dev)) @@ -648,6 +649,26 @@ static void vlan_dev_uninit(struct net_device *dev) } } +static int vlan_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (!real_dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + + return real_dev->ethtool_ops->get_settings(real_dev, cmd); +} + +static void vlan_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, vlan_fullname); + strcpy(info->version, vlan_version); + strcpy(info->fw_version, "N/A"); +} + static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -672,11 +693,43 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) } static const struct ethtool_ops vlan_ethtool_ops = { + .get_settings = vlan_ethtool_get_settings, + .get_drvinfo = vlan_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_rx_csum = vlan_ethtool_get_rx_csum, .get_flags = vlan_ethtool_get_flags, }; +static const struct net_device_ops vlan_netdev_ops = { + .ndo_change_mtu = vlan_dev_change_mtu, + .ndo_init = vlan_dev_init, + .ndo_uninit = vlan_dev_uninit, + .ndo_open = vlan_dev_open, + .ndo_stop = vlan_dev_stop, + .ndo_start_xmit = vlan_dev_hard_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = vlan_dev_set_mac_address, + .ndo_set_rx_mode = vlan_dev_set_rx_mode, + .ndo_set_multicast_list = vlan_dev_set_rx_mode, + .ndo_change_rx_flags = vlan_dev_change_rx_flags, + .ndo_do_ioctl = vlan_dev_ioctl, +}; + +static const struct net_device_ops vlan_netdev_accel_ops = { + .ndo_change_mtu = vlan_dev_change_mtu, + .ndo_init = vlan_dev_init, + .ndo_uninit = vlan_dev_uninit, + .ndo_open = vlan_dev_open, + .ndo_stop = vlan_dev_stop, + .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = vlan_dev_set_mac_address, + .ndo_set_rx_mode = vlan_dev_set_rx_mode, + .ndo_set_multicast_list = vlan_dev_set_rx_mode, + .ndo_change_rx_flags = vlan_dev_change_rx_flags, + .ndo_do_ioctl = vlan_dev_ioctl, +}; + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -684,16 +737,7 @@ void vlan_setup(struct net_device *dev) dev->priv_flags |= IFF_802_1Q_VLAN; dev->tx_queue_len = 0; - dev->change_mtu = vlan_dev_change_mtu; - dev->init = vlan_dev_init; - dev->uninit = vlan_dev_uninit; - dev->open = vlan_dev_open; - dev->stop = vlan_dev_stop; - dev->set_mac_address = vlan_dev_set_mac_address; - dev->set_rx_mode = vlan_dev_set_rx_mode; - dev->set_multicast_list = vlan_dev_set_rx_mode; - dev->change_rx_flags = vlan_dev_change_rx_flags; - dev->do_ioctl = vlan_dev_ioctl; + dev->netdev_ops = &vlan_netdev_ops; dev->destructor = free_netdev; dev->ethtool_ops = &vlan_ethtool_ops; diff --git a/net/9p/client.c b/net/9p/client.c index 4b529454616..821f1ec0b2c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -627,7 +627,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; fid->rdir_fpos = 0; - fid->uid = current->fsuid; + fid->uid = current_fsuid(); fid->clnt = clnt; fid->aux = NULL; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2f1fe5fc122..7fa0eb20b2f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -528,8 +528,6 @@ static void rdma_close(struct p9_client *client) /** * alloc_rdma - Allocate and initialize the rdma transport structure - * @msize: MTU - * @dotu: Extension attribute * @opts: Mount options structure */ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) diff --git a/net/Kconfig b/net/Kconfig index d789d79551a..6ec2cce7c16 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -27,11 +27,14 @@ menu "Networking options" config NET_NS bool "Network namespace support" default n - depends on EXPERIMENTAL && !SYSFS && NAMESPACES + depends on EXPERIMENTAL && NAMESPACES help Allow user space to create what appear to be multiple instances of the network stack. +config COMPAT_NET_DEV_OPS + def_bool y + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" @@ -191,6 +194,7 @@ source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" +source "net/dcb/Kconfig" menu "Network testing" @@ -247,7 +251,6 @@ if WIRELESS source "net/wireless/Kconfig" source "net/mac80211/Kconfig" -source "net/ieee80211/Kconfig" endif # WIRELESS diff --git a/net/Makefile b/net/Makefile index 27d1f10dc0e..ba4460432b7 100644 --- a/net/Makefile +++ b/net/Makefile @@ -51,12 +51,14 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-y += wireless/ obj-$(CONFIG_MAC80211) += mac80211/ -obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ +ifneq ($(CONFIG_DCB),) +obj-y += dcb/ +endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index b25c1e909d1..b03ff58e930 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -995,7 +995,6 @@ static int aarp_seq_show(struct seq_file *seq, void *v) struct aarp_iter_state *iter = seq->private; struct aarp_entry *entry = v; unsigned long now = jiffies; - DECLARE_MAC_BUF(mac); if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1006,7 +1005,7 @@ static int aarp_seq_show(struct seq_file *seq, void *v) ntohs(entry->target_addr.s_net), (unsigned int) entry->target_addr.s_node, entry->dev ? entry->dev->name : "????"); - seq_printf(seq, "%s", print_mac(mac, entry->hwaddr)); + seq_printf(seq, "%pM", entry->hwaddr); seq_printf(seq, " %8s", dt2str((long)entry->expires_at - (long)now)); if (iter->table == unresolved) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index d3134e7e6ee..5abce07fb50 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -815,9 +815,6 @@ static int atif_ioctl(int cmd, void __user *arg) return -EPERM; if (sa->sat_family != AF_APPLETALK) return -EINVAL; - if (!atif) - return -EADDRNOTAVAIL; - /* * for now, we only support proxy AARP on ELAP; * we should be able to do it for LocalTalk, too. @@ -1284,7 +1281,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) skb->dev = dev; skb_reset_transport_header(skb); - stats = dev->priv; + stats = netdev_priv(dev); stats->rx_packets++; stats->rx_bytes += skb->len + 13; netif_rx(skb); /* Send the SKB up to a higher place. */ diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 621805dfa2f..8d237b15183 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -17,8 +17,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_expiry_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_TICK_TIME, @@ -26,8 +26,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_tick_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_ATALK_AARP_RETRANSMIT_LIMIT, @@ -35,7 +35,7 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_retransmit_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_ATALK_AARP_RESOLVE_TIME, @@ -43,8 +43,8 @@ static struct ctl_table atalk_table[] = { .data = &sysctl_aarp_resolve_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 1b88311f213..b5674dc2083 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -149,7 +149,7 @@ int atm_register_sysfs(struct atm_dev *adev) cdev->class = &atm_class; dev_set_drvdata(cdev, adev); - snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + dev_set_name(cdev, "%s%d", adev->type, adev->number); err = device_register(cdev); if (err < 0) return err; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 280de481edc..ea9438fc685 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -101,7 +101,7 @@ static LIST_HEAD(br2684_devs); static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) { - return (struct br2684_dev *)net_dev->priv; + return (struct br2684_dev *)netdev_priv(net_dev); } static inline struct net_device *list_entry_brdev(const struct list_head *le) @@ -698,12 +698,11 @@ static int br2684_seq_show(struct seq_file *seq, void *v) br2684_devs); const struct net_device *net_dev = brdev->net_dev; const struct br2684_vcc *brvcc; - DECLARE_MAC_BUF(mac); - seq_printf(seq, "dev %.16s: num=%d, mac=%s (%s)\n", + seq_printf(seq, "dev %.16s: num=%d, mac=%pM (%s)\n", net_dev->name, brdev->number, - print_mac(mac, net_dev->dev_addr), + net_dev->dev_addr, brdev->mac_was_set ? "set" : "auto"); list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { diff --git a/net/atm/clip.c b/net/atm/clip.c index 5b5b96344ce..2d33a83be79 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -822,8 +822,8 @@ static void atmarp_info(struct seq_file *seq, struct net_device *dev, seq_printf(seq, "%-6s%-4s%-4s%5ld ", dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp); - off = scnprintf(buf, sizeof(buf) - 1, "%d.%d.%d.%d", - NIPQUAD(entry->ip)); + off = scnprintf(buf, sizeof(buf) - 1, "%pI4", + &entry->ip); while (off < 16) buf[off++] = ' '; buf[off] = '\0'; diff --git a/net/atm/common.h b/net/atm/common.h index 16f32c1fa1c..92e2981f479 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -19,6 +19,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen); int vcc_getsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 7afd8e7754f..76ed3c8d26e 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -19,6 +19,7 @@ #include <linux/atmlec.h> #include <linux/mutex.h> #include <asm/ioctls.h> +#include <net/compat.h> #include "resources.h" #include "signaling.h" /* for WAITING and sigd_attach */ @@ -46,7 +47,7 @@ void deregister_atm_ioctl(struct atm_ioctl *ioctl) EXPORT_SYMBOL(register_atm_ioctl); EXPORT_SYMBOL(deregister_atm_ioctl); -int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg, int compat) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -80,13 +81,25 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) goto done; } case SIOCGSTAMP: /* borrowed from IP */ - error = sock_get_timestamp(sk, argp); +#ifdef CONFIG_COMPAT + if (compat) + error = compat_sock_get_timestamp(sk, argp); + else +#endif + error = sock_get_timestamp(sk, argp); goto done; case SIOCGSTAMPNS: /* borrowed from IP */ - error = sock_get_timestampns(sk, argp); +#ifdef CONFIG_COMPAT + if (compat) + error = compat_sock_get_timestampns(sk, argp); + else +#endif + error = sock_get_timestampns(sk, argp); goto done; case ATM_SETSC: - printk(KERN_WARNING "ATM_SETSC is obsolete\n"); + if (net_ratelimit()) + printk(KERN_WARNING "ATM_SETSC is obsolete; used by %s:%d\n", + current->comm, task_pid_nr(current)); error = 0; goto done; case ATMSIGD_CTRL: @@ -99,12 +112,23 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) * info uses kernel pointers as opaque references, * so the holder of the file descriptor can scribble * on the kernel... so we should make sure that we - * have the same privledges that /proc/kcore needs + * have the same privileges that /proc/kcore needs */ if (!capable(CAP_SYS_RAWIO)) { error = -EPERM; goto done; } +#ifdef CONFIG_COMPAT + /* WTF? I don't even want to _think_ about making this + work for 32-bit userspace. TBH I don't really want + to think about it at all. dwmw2. */ + if (compat) { + if (net_ratelimit()) + printk(KERN_WARNING "32-bit task cannot be atmsigd\n"); + error = -EINVAL; + goto done; + } +#endif error = sigd_attach(vcc); if (!error) sock->state = SS_CONNECTED; @@ -155,8 +179,21 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (error != -ENOIOCTLCMD) goto done; - error = atm_dev_ioctl(cmd, argp); + error = atm_dev_ioctl(cmd, argp, compat); done: return error; } + + +int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return do_vcc_ioctl(sock, cmd, arg, 0); +} + +#ifdef CONFIG_COMPAT +int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return do_vcc_ioctl(sock, cmd, arg, 1); +} +#endif diff --git a/net/atm/lec.c b/net/atm/lec.c index 8f701cde594..e5e301550e8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -152,7 +152,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) buff += 4; mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ - priv = (struct lec_priv *)dev->priv; + priv = netdev_priv(dev); atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb2); @@ -218,7 +218,7 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) static int lec_open(struct net_device *dev) { - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); netif_start_queue(dev); memset(&priv->stats, 0, sizeof(struct net_device_stats)); @@ -252,7 +252,7 @@ static void lec_tx_timeout(struct net_device *dev) static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *skb2; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct lecdatahdr_8023 *lec_h; struct atm_vcc *vcc; struct lec_arp_table *entry; @@ -373,19 +373,13 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { pr_debug("%s:lec_start_xmit: queuing packet, ", dev->name); - pr_debug("MAC address " MAC_FMT "\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %pM\n", lec_h->h_dest); skb_queue_tail(&entry->tx_wait, skb); } else { pr_debug ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - pr_debug("MAC address " MAC_FMT "\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %pM\n", lec_h->h_dest); priv->stats.tx_dropped++; dev_kfree_skb(skb); } @@ -397,10 +391,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { pr_debug("lec.c: emptying tx queue, "); - pr_debug("MAC address " MAC_FMT "\n", - lec_h->h_dest[0], lec_h->h_dest[1], - lec_h->h_dest[2], lec_h->h_dest[3], - lec_h->h_dest[4], lec_h->h_dest[5]); + pr_debug("MAC address %pM\n", lec_h->h_dest); lec_send(vcc, skb2, priv); } @@ -442,14 +433,14 @@ static int lec_close(struct net_device *dev) */ static struct net_device_stats *lec_get_stats(struct net_device *dev) { - return &((struct lec_priv *)dev->priv)->stats; + return &((struct lec_priv *)netdev_priv(dev))->stats; } static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct atmlec_msg *mesg; struct lec_arp_table *entry; int i; @@ -539,15 +530,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_bridge_fdb_entry *f; - pr_debug - ("%s: bridge zeppelin asks about " MAC_FMT "\n", - dev->name, - mesg->content.proxy.mac_addr[0], - mesg->content.proxy.mac_addr[1], - mesg->content.proxy.mac_addr[2], - mesg->content.proxy.mac_addr[3], - mesg->content.proxy.mac_addr[4], - mesg->content.proxy.mac_addr[5]); + pr_debug("%s: bridge zeppelin asks about %pM\n", + dev->name, mesg->content.proxy.mac_addr); if (br_fdb_get_hook == NULL || dev->br_port == NULL) break; @@ -596,7 +580,7 @@ static void lec_atm_close(struct atm_vcc *vcc) { struct sk_buff *skb; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); priv->lecd = NULL; /* Do something needful? */ @@ -727,7 +711,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); #if DUMP_PACKETS >0 int i = 0; @@ -874,7 +858,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; vcc->pop = lec_pop; - lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, + lec_vcc_added(netdev_priv(dev_lec[ioc_data.dev_num]), &ioc_data, vcc, vcc->push); vcc->proto_data = dev_lec[ioc_data.dev_num]; vcc->push = lec_push; @@ -886,7 +870,8 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg) if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; - return (lec_mcast_make((struct lec_priv *)dev_lec[arg]->priv, vcc)); + return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]), + vcc); } /* Initialize device. */ @@ -928,11 +913,11 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) return -EINVAL; } - priv = dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); priv->is_trdev = is_trdev; lec_init(dev_lec[i]); } else { - priv = dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); if (priv->lecd) return -EADDRINUSE; } @@ -1093,7 +1078,8 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) void *v; dev = state->dev ? state->dev : dev_lec[state->itf]; - v = (dev && dev->priv) ? lec_priv_walk(state, l, dev->priv) : NULL; + v = (dev && netdev_priv(dev)) ? + lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { dev_put(dev); /* Partial state reset for the next time we get called */ @@ -1255,7 +1241,7 @@ static void __exit lane_module_cleanup(void) for (i = 0; i < MAX_LEC_ITF; i++) { if (dev_lec[i] != NULL) { - priv = (struct lec_priv *)dev_lec[i]->priv; + priv = netdev_priv(dev_lec[i]); unregister_netdev(dev_lec[i]); free_netdev(dev_lec[i]); dev_lec[i] = NULL; @@ -1279,7 +1265,7 @@ static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); struct lec_arp_table *table; struct sk_buff *skb; int retval; @@ -1326,7 +1312,7 @@ static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst, { int retval; struct sk_buff *skb; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); if (compare_ether_addr(lan_dst, dev->dev_addr)) return (0); /* not our mac address */ @@ -1363,7 +1349,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #if 0 int i = 0; #endif - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_priv *priv = netdev_priv(dev); #if 0 /* * Why have the TLVs in LE_ARP entries * since we do not use them? When you diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 11b16d16661..039d5cc72c3 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -232,8 +232,8 @@ void atm_mpoa_disp_qos(struct seq_file *m) seq_printf(m, "IP address\n TX:max_pcr pcr min_pcr max_cdv max_sdu\n RX:max_pcr pcr min_pcr max_cdv max_sdu\n"); while (qos != NULL) { - seq_printf(m, "%u.%u.%u.%u\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", - NIPQUAD(qos->ipaddr), + seq_printf(m, "%pI4\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", + &qos->ipaddr, qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu, qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu); qos = qos->next; @@ -341,8 +341,8 @@ static const char *mpoa_device_type_string(char type) } /* - * lec device calls this via its dev->priv->lane2_ops->associate_indicator() - * when it sees a TLV in LE_ARP packet. + * lec device calls this via its netdev_priv(dev)->lane2_ops + * ->associate_indicator() when it sees a TLV in LE_ARP packet. * We fill in the pointer above when we see a LANE2 lec initializing * See LANE2 spec 3.1.5 * @@ -595,8 +595,8 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) if (in_entry != NULL) mpc->in_ops->put(in_entry); return -EINVAL; } - printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %u.%u.%u.%u\n", - mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip)); + printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %pI4\n", + mpc->dev->name, &in_entry->ctrl_info.in_dst_ip); in_entry->shortcut = vcc; mpc->in_ops->put(in_entry); } else { @@ -627,8 +627,8 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev) dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name); in_entry = mpc->in_ops->get_by_vcc(vcc, mpc); if (in_entry) { - dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %u.%u.%u.%u\n", - mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip)); + dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %pI4\n", + mpc->dev->name, &in_entry->ctrl_info.in_dst_ip); in_entry->shortcut = NULL; mpc->in_ops->put(in_entry); } @@ -785,7 +785,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg) } if (mpc->dev) { /* check if the lec is LANE2 capable */ - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); if (priv->lane_version < 2) { dev_put(mpc->dev); mpc->dev = NULL; @@ -845,7 +845,7 @@ static void mpoad_close(struct atm_vcc *vcc) mpc->mpoad_vcc = NULL; if (mpc->dev) { - struct lec_priv *priv = (struct lec_priv *)mpc->dev->priv; + struct lec_priv *priv = netdev_priv(mpc->dev); priv->lane2_ops->associate_indicator = NULL; stop_mpc(mpc); dev_put(mpc->dev); @@ -976,7 +976,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned lo switch (event) { case NETDEV_REGISTER: /* a new lec device was allocated */ - priv = (struct lec_priv *)dev->priv; + priv = netdev_priv(dev); if (priv->lane_version < 2) break; priv->lane2_ops->associate_indicator = lane2_assoc_ind; @@ -1098,7 +1098,8 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien entry->shortcut = eg_entry->shortcut; } if(entry->shortcut){ - dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(dst_ip)); + dprintk("mpoa: (%s) using egress SVC to reach %pI4\n", + client->dev->name, &dst_ip); client->eg_ops->put(eg_entry); return; } @@ -1123,7 +1124,8 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) __be32 dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); - dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); + dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %pI4\n", + mpc->dev->name, &dst_ip); ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry); if(entry == NULL){ printk("\nmpoa: (%s) ARGH, received res. reply for an entry that doesn't exist.\n", mpc->dev->name); @@ -1171,14 +1173,14 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); if(entry == NULL){ - printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ", mpc->dev->name); - printk("ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip)); + printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ip = %pI4\n", + mpc->dev->name, &dst_ip); return; } do { - dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %u.%u.%u.%u\n" , - mpc->dev->name, NIPQUAD(dst_ip)); + dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %pI4\n", + mpc->dev->name, &dst_ip); write_lock_bh(&mpc->ingress_lock); mpc->in_ops->remove_entry(entry, mpc); write_unlock_bh(&mpc->ingress_lock); @@ -1322,7 +1324,7 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *m dprintk("\n"); if (mpc->dev) { - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); retval = priv->lane2_ops->associate_req(mpc->dev, mpc->dev->dev_addr, tlv, sizeof(tlv)); if (retval == 0) printk("mpoa: (%s) MPOA device type TLV association failed\n", mpc->dev->name); @@ -1472,7 +1474,7 @@ static void __exit atm_mpoa_cleanup(void) tmp = mpc->next; if (mpc->dev != NULL) { stop_mpc(mpc); - priv = (struct lec_priv *)mpc->dev->priv; + priv = netdev_priv(mpc->dev); if (priv->lane2_ops != NULL) priv->lane2_ops->associate_indicator = NULL; } diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 24799e3e78f..4504a4b339b 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -94,7 +94,7 @@ static in_cache_entry *in_cache_add_entry(__be32 dst_ip, return NULL; } - dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip)); + dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %pI4\n", &dst_ip); atomic_set(&entry->use, 1); dprintk("mpoa: mpoa_caches.c: new_in_cache_entry: about to lock\n"); @@ -150,7 +150,8 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) if( entry->count > mpc->parameters.mpc_p1 && entry->entry_state == INGRESS_INVALID){ - dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, NIPQUAD(entry->ctrl_info.in_dst_ip)); + dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %pI4, sending MPOA res req\n", + mpc->dev->name, &entry->ctrl_info.in_dst_ip); entry->entry_state = INGRESS_RESOLVING; msg.type = SND_MPOA_RES_RQST; memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN ); @@ -184,7 +185,8 @@ static void in_cache_remove_entry(in_cache_entry *entry, struct k_message msg; vcc = entry->shortcut; - dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",NIPQUAD(entry->ctrl_info.in_dst_ip)); + dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %pI4\n", + &entry->ctrl_info.in_dst_ip); if (entry->prev != NULL) entry->prev->next = entry->next; @@ -228,7 +230,8 @@ static void clear_count_and_expired(struct mpoa_client *client) next_entry = entry->next; if((now.tv_sec - entry->tv.tv_sec) > entry->ctrl_info.holding_time){ - dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(entry->ctrl_info.in_dst_ip)); + dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %pI4\n", + &entry->ctrl_info.in_dst_ip); client->in_ops->remove_entry(entry, client); } entry = next_entry; @@ -453,7 +456,8 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli return NULL; } - dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(msg->content.eg_info.eg_dst_ip)); + dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %pI4, this should be our IP\n", + &msg->content.eg_info.eg_dst_ip); atomic_set(&entry->use, 1); dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry: about to lock\n"); @@ -469,8 +473,8 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli do_gettimeofday(&(entry->tv)); entry->entry_state = EGRESS_RESOLVED; dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %lu\n", ntohl(entry->ctrl_info.cache_id)); - dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", - NIPQUAD(entry->ctrl_info.mps_ip)); + dprintk("mpoa: mpoa_caches.c: mps_ip = %pI4\n", + &entry->ctrl_info.mps_ip); atomic_inc(&entry->use); write_unlock_irq(&client->egress_lock); diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 43e8bf5ed00..e1d22d9430d 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -113,6 +113,9 @@ static const struct proto_ops pvc_proto_ops = { .getname = pvc_getname, .poll = vcc_poll, .ioctl = vcc_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vcc_compat_ioctl, +#endif .listen = sock_no_listen, .shutdown = pvc_shutdown, .setsockopt = pvc_setsockopt, diff --git a/net/atm/resources.c b/net/atm/resources.c index a34ba948af9..56b7322ff46 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -195,20 +195,39 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, in } -int atm_dev_ioctl(unsigned int cmd, void __user *arg) +int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) { void __user *buf; int error, len, number, size = 0; struct atm_dev *dev; struct list_head *p; int *tmp_buf, *tmp_p; - struct atm_iobuf __user *iobuf = arg; - struct atmif_sioc __user *sioc = arg; + int __user *sioc_len; + int __user *iobuf_len; + +#ifndef CONFIG_COMPAT + compat = 0; /* Just so the compiler _knows_ */ +#endif + switch (cmd) { case ATM_GETNAMES: - if (get_user(buf, &iobuf->buffer)) - return -EFAULT; - if (get_user(len, &iobuf->length)) + + if (compat) { +#ifdef CONFIG_COMPAT + struct compat_atm_iobuf __user *ciobuf = arg; + compat_uptr_t cbuf; + iobuf_len = &ciobuf->length; + if (get_user(cbuf, &ciobuf->buffer)) + return -EFAULT; + buf = compat_ptr(cbuf); +#endif + } else { + struct atm_iobuf __user *iobuf = arg; + iobuf_len = &iobuf->length; + if (get_user(buf, &iobuf->buffer)) + return -EFAULT; + } + if (get_user(len, iobuf_len)) return -EFAULT; mutex_lock(&atm_dev_mutex); list_for_each(p, &atm_devs) @@ -229,7 +248,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } mutex_unlock(&atm_dev_mutex); error = ((copy_to_user(buf, tmp_buf, size)) || - put_user(size, &iobuf->length)) + put_user(size, iobuf_len)) ? -EFAULT : 0; kfree(tmp_buf); return error; @@ -237,13 +256,32 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) break; } - if (get_user(buf, &sioc->arg)) - return -EFAULT; - if (get_user(len, &sioc->length)) - return -EFAULT; - if (get_user(number, &sioc->number)) - return -EFAULT; - + if (compat) { +#ifdef CONFIG_COMPAT + struct compat_atmif_sioc __user *csioc = arg; + compat_uptr_t carg; + + sioc_len = &csioc->length; + if (get_user(carg, &csioc->arg)) + return -EFAULT; + buf = compat_ptr(carg); + + if (get_user(len, &csioc->length)) + return -EFAULT; + if (get_user(number, &csioc->number)) + return -EFAULT; +#endif + } else { + struct atmif_sioc __user *sioc = arg; + + sioc_len = &sioc->length; + if (get_user(buf, &sioc->arg)) + return -EFAULT; + if (get_user(len, &sioc->length)) + return -EFAULT; + if (get_user(number, &sioc->number)) + return -EFAULT; + } if (!(dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d", number))) return -ENODEV; @@ -358,7 +396,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) size = error; /* may return 0, but later on size == 0 means "don't write the length" */ - error = put_user(size, &sioc->length) + error = put_user(size, sioc_len) ? -EFAULT : 0; goto done; case ATM_SETLOOP: @@ -380,11 +418,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } /* fall through */ default: - if (!dev->ops->ioctl) { - error = -EINVAL; - goto done; + if (compat) { +#ifdef CONFIG_COMPAT + if (!dev->ops->compat_ioctl) { + error = -EINVAL; + goto done; + } + size = dev->ops->compat_ioctl(dev, cmd, buf); +#endif + } else { + if (!dev->ops->ioctl) { + error = -EINVAL; + goto done; + } + size = dev->ops->ioctl(dev, cmd, buf); } - size = dev->ops->ioctl(dev, cmd, buf); if (size < 0) { error = (size == -ENOIOCTLCMD ? -EINVAL : size); goto done; @@ -392,7 +440,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } if (size) - error = put_user(size, &sioc->length) + error = put_user(size, sioc_len) ? -EFAULT : 0; else error = 0; diff --git a/net/atm/resources.h b/net/atm/resources.h index 1d004aaaeec..126fb1840df 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -13,7 +13,7 @@ extern struct list_head atm_devs; extern struct mutex atm_dev_mutex; -int atm_dev_ioctl(unsigned int cmd, void __user *arg); +int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat); #ifdef CONFIG_PROC_FS diff --git a/net/atm/svc.c b/net/atm/svc.c index 8fb54dc870b..7b831b526d0 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -608,6 +608,22 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return error; } +#ifdef CONFIG_COMPAT +static int svc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + /* The definition of ATM_ADDPARTY uses the size of struct atm_iobuf. + But actually it takes a struct sockaddr_atmsvc, which doesn't need + compat handling. So all we have to do is fix up cmd... */ + if (cmd == COMPAT_ATM_ADDPARTY) + cmd = ATM_ADDPARTY; + + if (cmd == ATM_ADDPARTY || cmd == ATM_DROPPARTY) + return svc_ioctl(sock, cmd, arg); + else + return vcc_compat_ioctl(sock, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + static const struct proto_ops svc_proto_ops = { .family = PF_ATMSVC, .owner = THIS_MODULE, @@ -620,6 +636,9 @@ static const struct proto_ops svc_proto_ops = { .getname = svc_getname, .poll = vcc_poll, .ioctl = svc_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = svc_compat_ioctl, +#endif .listen = svc_listen, .shutdown = svc_shutdown, .setsockopt = svc_setsockopt, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 28c71574a78..00d9e5e1315 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1045,7 +1045,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->fsa_ax25.sax25_family != AF_AX25) return -EINVAL; - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (user) { call = user->call; ax25_uid_put(user); diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 4a5ba978a80..5f1d2107a1d 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -200,19 +200,15 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb_reset_transport_header(skb); - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { - kfree_skb(skb); - return 0; - } + if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) + goto free; /* * Parse the address header. */ - if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL) { - kfree_skb(skb); - return 0; - } + if (ax25_addr_parse(skb->data, skb->len, &src, &dest, &dp, &type, &dama) == NULL) + goto free; /* * Ours perhaps ? @@ -239,10 +235,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, ax25_send_to_raw(&dest, skb, skb->data[1]); - if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) { - kfree_skb(skb); - return 0; - } + if (!mine && ax25cmp(&dest, (ax25_address *)dev->broadcast) != 0) + goto free; /* Now we are pointing at the pid byte */ switch (skb->data[1]) { @@ -301,10 +295,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, * If not, should we DM the incoming frame (except DMs) or * silently ignore them. For now we stay quiet. */ - if (ax25_dev->values[AX25_VALUES_CONMODE] == 0) { - kfree_skb(skb); - return 0; - } + if (ax25_dev->values[AX25_VALUES_CONMODE] == 0) + goto free; /* LAPB */ @@ -339,8 +331,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, if ((*skb->data & ~AX25_PF) != AX25_DM && mine) ax25_return_dm(dev, &src, &dest, &dp); - kfree_skb(skb); - return 0; + goto free; } /* b) received SABM(E) */ @@ -372,15 +363,12 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, sk->sk_ack_backlog++; bh_unlock_sock(sk); } else { - if (!mine) { - kfree_skb(skb); - return 0; - } + if (!mine) + goto free; if ((ax25 = ax25_create_cb()) == NULL) { ax25_return_dm(dev, &src, &dest, &dp); - kfree_skb(skb); - return 0; + goto free; } ax25_fillin_cb(ax25, ax25_dev); @@ -436,9 +424,10 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb->len); sock_put(sk); - } else + } else { +free: kfree_skb(skb); - + } return 0; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 8672cd84fdf..c833ba4c45a 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -421,7 +421,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) goto put; } - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (user) { ax25->source_addr = user->call; ax25_uid_put(user); diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index f288fc4aef9..62ee3fb3473 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -24,7 +24,9 @@ static int min_idle[1], max_idle[] = {65535000}; static int min_n2[] = {1}, max_n2[] = {31}; static int min_paclen[] = {1}, max_paclen[] = {512}; static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; +#ifdef CONFIG_AX25_DAMA_SLAVE static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; +#endif static struct ctl_table_header *ax25_table_header; @@ -43,8 +45,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, @@ -53,8 +55,8 @@ static const ctl_table ax25_param_table[] = { .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, @@ -63,8 +65,8 @@ static const ctl_table ax25_param_table[] = { .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_backoff, .extra2 = &max_backoff }, @@ -73,8 +75,8 @@ static const ctl_table ax25_param_table[] = { .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_conmode, .extra2 = &max_conmode }, @@ -83,8 +85,8 @@ static const ctl_table ax25_param_table[] = { .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -93,8 +95,8 @@ static const ctl_table ax25_param_table[] = { .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, @@ -103,8 +105,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -113,8 +115,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -123,8 +125,8 @@ static const ctl_table ax25_param_table[] = { .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, @@ -133,8 +135,8 @@ static const ctl_table ax25_param_table[] = { .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -143,8 +145,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -153,8 +155,8 @@ static const ctl_table ax25_param_table[] = { .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_paclen, .extra2 = &max_paclen }, @@ -163,8 +165,8 @@ static const ctl_table ax25_param_table[] = { .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_proto, .extra2 = &max_proto }, @@ -174,8 +176,8 @@ static const ctl_table ax25_param_table[] = { .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8f9431a12c6..744ed3f07ef 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -41,18 +41,14 @@ #include <net/bluetooth/bluetooth.h> -#ifndef CONFIG_BT_SOCK_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - -#define VERSION "2.13" +#define VERSION "2.14" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 static struct net_proto_family *bt_proto[BT_MAX_PROTO]; +static DEFINE_RWLOCK(bt_proto_lock); -static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; static const char *bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", @@ -65,6 +61,7 @@ static const char *bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP", }; +static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; static const char *bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_L2CAP", "slock-AF_BLUETOOTH-BTPROTO_HCI", @@ -75,7 +72,25 @@ static const char *bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_HIDP", "slock-AF_BLUETOOTH-BTPROTO_AVDTP", }; -static DEFINE_RWLOCK(bt_proto_lock); + +static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) +{ + struct sock *sk = sock->sk; + + if (!sk) + return; + + BUG_ON(sock_owned_by_user(sk)); + + sock_lock_init_class_and_name(sk, + bt_slock_key_strings[proto], &bt_slock_key[proto], + bt_key_strings[proto], &bt_lock_key[proto]); +} +#else +static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) +{ +} +#endif int bt_sock_register(int proto, struct net_proto_family *ops) { @@ -117,21 +132,6 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static void bt_reclassify_sock_lock(struct socket *sock, int proto) -{ - struct sock *sk = sock->sk; - - if (!sk) - return; - BUG_ON(sock_owned_by_user(sk)); - - sock_lock_init_class_and_name(sk, - bt_slock_key_strings[proto], - &bt_slock_key[proto], - bt_key_strings[proto], - &bt_lock_key[proto]); -} - static int bt_sock_create(struct net *net, struct socket *sock, int proto) { int err; @@ -151,7 +151,7 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto); - bt_reclassify_sock_lock(sock, proto); + bt_sock_reclassify_lock(sock, proto); module_put(bt_proto[proto]->owner); } @@ -240,7 +240,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, size_t copied; int err; - BT_DBG("sock %p sk %p len %d", sock, sk, len); + BT_DBG("sock %p sk %p len %zu", sock, sk, len); if (flags & (MSG_OOB)) return -EOPNOTSUPP; diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index b69bf4e7c48..d20f8a40f36 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -161,7 +161,7 @@ struct bnep_session { struct msghdr msg; struct bnep_proto_filter proto_filter[BNEP_MAX_PROTO_FILTERS]; - u64 mc_filter; + unsigned long long mc_filter; struct socket *sock; struct net_device *dev; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 80ba30cf4b6..70fea8bdb4e 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -52,11 +52,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.3" static int compress_src = 1; @@ -311,7 +306,6 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) struct sk_buff *nskb; u8 type; - dev->last_rx = jiffies; s->stats.rx_bytes += skb->len; type = *(u8 *) skb->data; skb_pull(skb, 1); @@ -566,7 +560,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) goto failed; } - s = dev->priv; + s = netdev_priv(dev); /* This is rx header therefore addresses are swapped. * ie eh.h_dest is our local address. */ diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d9fa0ab2c87..f897da6e044 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -41,11 +41,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - #define BNEP_TX_QUEUE_LEN 20 static int bnep_net_open(struct net_device *dev) @@ -62,14 +57,14 @@ static int bnep_net_close(struct net_device *dev) static struct net_device_stats *bnep_net_get_stats(struct net_device *dev) { - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); return &s->stats; } static void bnep_net_set_mc_list(struct net_device *dev) { #ifdef CONFIG_BT_BNEP_MC_FILTER - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); struct sock *sk = s->sock->sk; struct bnep_set_filter_req *r; struct sk_buff *skb; @@ -183,7 +178,7 @@ static inline int bnep_net_proto_filter(struct sk_buff *skb, struct bnep_session static int bnep_net_xmit(struct sk_buff *skb, struct net_device *dev) { - struct bnep_session *s = dev->priv; + struct bnep_session *s = netdev_priv(dev); struct sock *sk = s->sock->sk; BT_DBG("skb %p, dev %p", skb, dev); diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 8ffb57f2303..e857628b0b2 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -46,11 +46,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 3e9d5bb3fef..78958c0f9a4 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -42,11 +42,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define CAPI_INTEROPERABILITY 0x20 #define CAPI_INTEROPERABILITY_REQ CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index ca60a4517fd..c9cac7719ef 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -44,11 +44,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.0" static DECLARE_RWSEM(cmtp_session_sem); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 8c7f7bc4e0b..16b0fad74f6 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -43,11 +43,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static int cmtp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b7002429f15..a4a789f24c8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -45,11 +45,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - void hci_acl_connect(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 278a3ace14f..ba78cc1eb8d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -48,11 +48,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static void hci_cmd_task(unsigned long arg); static void hci_rx_task(unsigned long arg); static void hci_tx_task(unsigned long arg); @@ -205,7 +200,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Mandatory initialization */ /* Reset */ - if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks)) + if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); /* Read Local Supported Features */ @@ -290,7 +285,7 @@ static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) { __le16 policy = cpu_to_le16(opt); - BT_DBG("%s %x", hdev->name, opt); + BT_DBG("%s %x", hdev->name, policy); /* Default link policy */ hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); @@ -756,7 +751,7 @@ int hci_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*dr); - if (!(dl = kmalloc(size, GFP_KERNEL))) + if (!(dl = kzalloc(size, GFP_KERNEL))) return -ENOMEM; dr = dl->dev_req; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ad7a553d771..f91ba690f5d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -45,11 +45,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d62579b6795..4f9621f759a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -49,11 +49,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#ifndef CONFIG_BT_HCI_SOCK_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - /* ----- HCI socket interface ----- */ static inline int hci_test_bit(int nr, void *addr) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f4f6615cad9..1a1f916be44 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -6,11 +6,6 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); @@ -113,8 +108,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", - hdev->name, conn->handle); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -132,7 +126,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) */ static int __match_tty(struct device *dev, void *data) { - return !strncmp(dev->bus_id, "rfcomm", 6); + return !strncmp(dev_name(dev), "rfcomm", 6); } static void del_conn(struct work_struct *work) @@ -421,7 +415,7 @@ int hci_register_sysfs(struct hci_dev *hdev) dev->class = bt_class; dev->parent = hdev->parent; - strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + dev_set_name(dev, "%s", hdev->name); dev_set_drvdata(dev, hdev); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index acdeab3d980..b18676870d5 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -47,11 +47,6 @@ #include "hidp.h" -#ifndef CONFIG_BT_HIDP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.2" static DECLARE_RWSEM(hidp_session_sem); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index f4dd02ca9a9..37c9d7d2e68 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -39,11 +39,6 @@ #include "hidp.h" -#ifndef CONFIG_BT_HIDP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static int hidp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 9610a9c85b9..b93748e224f 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -50,11 +50,6 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> -#ifndef CONFIG_BT_L2CAP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "2.11" static u32 l2cap_feat_mask = 0x0000; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ba537fae0a4..acd84fd524b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,11 +46,6 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/rfcomm.h> -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.10" static int disable_cfc = 0; @@ -1786,8 +1781,6 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s) if (err < 0) return; - __module_get(nsock->ops->owner); - /* Set our callbacks */ nsock->sk->sk_data_ready = rfcomm_l2data_ready; nsock->sk->sk_state_change = rfcomm_l2state_change; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8a972b6ba85..d3fc6fca38d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -50,11 +50,6 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/rfcomm.h> -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { @@ -644,7 +639,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - BT_DBG("sk %p size %d", sk, size); + BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); @@ -792,7 +787,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk = sock->sk; + struct sock *sk __maybe_unused = sock->sk; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d3340dd52bc..d030c69cb5a 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -39,11 +39,6 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/rfcomm.h> -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define RFCOMM_TTY_MAGIC 0x6d02 /* magic number for rfcomm struct */ #define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV /* whole lotta rfcomm devices */ #define RFCOMM_TTY_MAJOR 216 /* device node major id of the usb/bluetooth.c driver */ @@ -58,7 +53,7 @@ struct rfcomm_dev { char name[12]; int id; unsigned long flags; - int opened; + atomic_t opened; int err; bdaddr_t src; @@ -261,6 +256,8 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) dev->flags = req->flags & ((1 << RFCOMM_RELEASE_ONHUP) | (1 << RFCOMM_REUSE_DLC)); + atomic_set(&dev->opened, 0); + init_waitqueue_head(&dev->wait); tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev); @@ -301,18 +298,15 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) out: write_unlock_bh(&rfcomm_dev_lock); - if (err < 0) { - kfree(dev); - return err; - } + if (err < 0) + goto free; dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL); if (IS_ERR(dev->tty_dev)) { err = PTR_ERR(dev->tty_dev); list_del(&dev->list); - kfree(dev); - return err; + goto free; } dev_set_drvdata(dev->tty_dev, dev); @@ -324,16 +318,20 @@ out: BT_ERR("Failed to create channel attribute"); return dev->id; + +free: + kfree(dev); + return err; } static void rfcomm_dev_del(struct rfcomm_dev *dev) { BT_DBG("dev %p", dev); - if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) - BUG_ON(1); - else - set_bit(RFCOMM_TTY_RELEASED, &dev->flags); + BUG_ON(test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)); + + if (atomic_read(&dev->opened) > 0) + return; write_lock_bh(&rfcomm_dev_lock); list_del_init(&dev->list); @@ -689,9 +687,10 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) if (!dev) return -ENODEV; - BT_DBG("dev %p dst %s channel %d opened %d", dev, batostr(&dev->dst), dev->channel, dev->opened); + BT_DBG("dev %p dst %s channel %d opened %d", dev, batostr(&dev->dst), + dev->channel, atomic_read(&dev->opened)); - if (dev->opened++ != 0) + if (atomic_inc_return(&dev->opened) > 1) return 0; dlc = dev->dlc; @@ -747,9 +746,10 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) if (!dev) return; - BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->opened); + BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, + atomic_read(&dev->opened)); - if (--dev->opened == 0) { + if (atomic_dec_and_test(&dev->opened)) { if (dev->tty_dev->parent) device_move(dev->tty_dev, NULL); @@ -763,6 +763,14 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) tty->driver_data = NULL; dev->tty = NULL; rfcomm_dlc_unlock(dev->dlc); + + if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) { + write_lock_bh(&rfcomm_dev_lock); + list_del_init(&dev->list); + write_unlock_bh(&rfcomm_dev_lock); + + rfcomm_dev_put(dev); + } } rfcomm_dev_put(dev); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 0cc91e6da76..46fd8bf9a69 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,11 +48,6 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/sco.h> -#ifndef CONFIG_BT_SCO_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "0.6" static int disable_esco = 0; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6c023f0f825..18538d7460d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -147,7 +147,7 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) return 0; } -static struct ethtool_ops br_ethtool_ops = { +static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, .get_tx_csum = ethtool_op_get_tx_csum, @@ -160,21 +160,25 @@ static struct ethtool_ops br_ethtool_ops = { .get_flags = ethtool_op_get_flags, }; +static const struct net_device_ops br_netdev_ops = { + .ndo_open = br_dev_open, + .ndo_stop = br_dev_stop, + .ndo_start_xmit = br_dev_xmit, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, +}; + void br_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); - dev->do_ioctl = br_dev_ioctl; - dev->hard_start_xmit = br_dev_xmit; - dev->open = br_dev_open; - dev->set_multicast_list = br_dev_set_multicast_list; - dev->change_mtu = br_change_mtu; + dev->netdev_ops = &br_netdev_ops; dev->destructor = free_netdev; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); - dev->stop = br_dev_stop; dev->tx_queue_len = 0; - dev->set_mac_address = br_set_mac_address; dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 0a09ccf68c1..727c5c510a6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; if (dev->br_port != NULL) @@ -460,7 +460,7 @@ void br_net_exit(struct net *net) restart: for_each_netdev(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { - del_br(dev->priv); + del_br(netdev_priv(dev)); goto restart; } } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 45f61c348e3..a65e43a17fb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -109,7 +109,6 @@ static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = __constant_htons(ETH_P_IP), .update_pmtu = fake_update_pmtu, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -370,7 +369,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - if (!ip_route_output_key(&init_net, &rt, &fl)) { + if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (((struct dst_entry *)rt)->dev == dev) { @@ -951,35 +950,35 @@ static ctl_table brnf_table[] = { .data = &brnf_call_arptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { .ctl_name = 0 } }; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 158dee8b496..603d89248e7 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -22,7 +22,7 @@ #include "br_private.h" #define to_dev(obj) container_of(obj, struct device, kobj) -#define to_bridge(cd) ((struct net_bridge *)(to_net_dev(cd)->priv)) +#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* * Common code for storing bridge parameters. diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 3d33c608906..d44cbf8c374 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -79,7 +79,6 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) } } -#define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -113,9 +112,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" INCOMPLETE IP header"); goto out; } - printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " - "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr), ih->tos, ih->protocol); + printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", + &ih->saddr, &ih->daddr, ih->tos, ih->protocol); print_ports(skb, ih->protocol, ih->ihl*4); goto out; } @@ -133,10 +131,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x " - "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 " - "priority=0x%01X, Next Header=%d", NIP6(ih->saddr), - NIP6(ih->daddr), ih->priority, ih->nexthdr); + printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); if (offset_ph == -1) @@ -177,12 +173,10 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, } printk(" ARP MAC SRC="); print_MAC(ap->mac_src); - printk(" ARP IP SRC=%u.%u.%u.%u", - myNIPQUAD(ap->ip_src)); + printk(" ARP IP SRC=%pI4", ap->ip_src); printk(" ARP MAC DST="); print_MAC(ap->mac_dst); - printk(" ARP IP DST=%u.%u.%u.%u", - myNIPQUAD(ap->ip_dst)); + printk(" ARP IP DST=%pI4", ap->ip_dst); } } out: diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 246626bb0c8..8604dfc1fc3 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -56,29 +56,47 @@ static int ebt_broute(struct sk_buff *skb) int ret; ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, - &broute_table); + dev_net(skb->dev)->xt.broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ } +static int __net_init broute_net_init(struct net *net) +{ + net->xt.broute_table = ebt_register_table(net, &broute_table); + if (IS_ERR(net->xt.broute_table)) + return PTR_ERR(net->xt.broute_table); + return 0; +} + +static void __net_exit broute_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.broute_table); +} + +static struct pernet_operations broute_net_ops = { + .init = broute_net_init, + .exit = broute_net_exit, +}; + static int __init ebtable_broute_init(void) { int ret; - ret = ebt_register_table(&broute_table); + ret = register_pernet_subsys(&broute_net_ops); if (ret < 0) return ret; /* see br_input.c */ rcu_assign_pointer(br_should_route_hook, ebt_broute); - return ret; + return 0; } static void __exit ebtable_broute_fini(void) { rcu_assign_pointer(br_should_route_hook, NULL); synchronize_net(); - ebt_unregister_table(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 1a58af51a2e..2b2e8040a9c 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -61,29 +61,36 @@ static struct ebt_table frame_filter = }; static unsigned int -ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, +ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_filter); + return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); +} + +static unsigned int +ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_out_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, @@ -91,23 +98,41 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { }, }; +static int __net_init frame_filter_net_init(struct net *net) +{ + net->xt.frame_filter = ebt_register_table(net, &frame_filter); + if (IS_ERR(net->xt.frame_filter)) + return PTR_ERR(net->xt.frame_filter); + return 0; +} + +static void __net_exit frame_filter_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.frame_filter); +} + +static struct pernet_operations frame_filter_net_ops = { + .init = frame_filter_net_init, + .exit = frame_filter_net_exit, +}; + static int __init ebtable_filter_init(void) { int ret; - ret = ebt_register_table(&frame_filter); + ret = register_pernet_subsys(&frame_filter_net_ops); if (ret < 0) return ret; ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); if (ret < 0) - ebt_unregister_table(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); return ret; } static void __exit ebtable_filter_fini(void) { nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); - ebt_unregister_table(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index f60c1e78e57..3fe1ae87e35 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -61,36 +61,36 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { { - .hook = ebt_nat_dst, + .hook = ebt_nat_out, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_nat_src, + .hook = ebt_nat_out, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_nat_dst, + .hook = ebt_nat_in, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, @@ -98,23 +98,41 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { }, }; +static int __net_init frame_nat_net_init(struct net *net) +{ + net->xt.frame_nat = ebt_register_table(net, &frame_nat); + if (IS_ERR(net->xt.frame_nat)) + return PTR_ERR(net->xt.frame_nat); + return 0; +} + +static void __net_exit frame_nat_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.frame_nat); +} + +static struct pernet_operations frame_nat_net_ops = { + .init = frame_nat_net_init, + .exit = frame_nat_net_exit, +}; + static int __init ebtable_nat_init(void) { int ret; - ret = ebt_register_table(&frame_nat); + ret = register_pernet_subsys(&frame_nat_net_ops); if (ret < 0) return ret; ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); if (ret < 0) - ebt_unregister_table(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } static void __exit ebtable_nat_fini(void) { nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); - ebt_unregister_table(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0fa208e8640..fa108c46e85 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -55,7 +55,6 @@ static DEFINE_MUTEX(ebt_mutex); -static LIST_HEAD(ebt_tables); static struct xt_target ebt_standard_target = { .name = "standard", @@ -315,9 +314,11 @@ find_inlist_lock(struct list_head *head, const char *name, const char *prefix, } static inline struct ebt_table * -find_table_lock(const char *name, int *error, struct mutex *mutex) +find_table_lock(struct net *net, const char *name, int *error, + struct mutex *mutex) { - return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); + return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name, + "ebtable_", error, mutex); } static inline int @@ -944,7 +945,7 @@ static void get_counters(struct ebt_counter *oldcounters, } /* replace the table */ -static int do_replace(void __user *user, unsigned int len) +static int do_replace(struct net *net, void __user *user, unsigned int len) { int ret, i, countersize; struct ebt_table_info *newinfo; @@ -1016,7 +1017,7 @@ static int do_replace(void __user *user, unsigned int len) if (ret != 0) goto free_counterstmp; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) { ret = -ENOENT; goto free_iterate; @@ -1097,7 +1098,7 @@ free_newinfo: return ret; } -int ebt_register_table(struct ebt_table *table) +struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table) { struct ebt_table_info *newinfo; struct ebt_table *t; @@ -1109,14 +1110,21 @@ int ebt_register_table(struct ebt_table *table) repl->entries_size == 0 || repl->counters || table->private) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + /* Don't add one table to multiple lists. */ + table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; } countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids; newinfo = vmalloc(sizeof(*newinfo) + countersize); ret = -ENOMEM; if (!newinfo) - return -ENOMEM; + goto free_table; p = vmalloc(repl->entries_size); if (!p) @@ -1148,7 +1156,7 @@ int ebt_register_table(struct ebt_table *table) if (table->check && table->check(newinfo, table->valid_hooks)) { BUGPRINT("The table doesn't like its own initial data, lol\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } table->private = newinfo; @@ -1157,7 +1165,7 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - list_for_each_entry(t, &ebt_tables, list) { + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; BUGPRINT("Table name already exists\n"); @@ -1170,9 +1178,9 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_add(&table->list, &ebt_tables); + list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); - return 0; + return table; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1184,7 +1192,10 @@ free_chainstack: vfree(newinfo->entries); free_newinfo: vfree(newinfo); - return ret; +free_table: + kfree(table); +out: + return ERR_PTR(ret); } void ebt_unregister_table(struct ebt_table *table) @@ -1198,6 +1209,10 @@ void ebt_unregister_table(struct ebt_table *table) mutex_lock(&ebt_mutex); list_del(&table->list); mutex_unlock(&ebt_mutex); + EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, + ebt_cleanup_entry, NULL); + if (table->private->nentries) + module_put(table->me); vfree(table->private->entries); if (table->private->chainstack) { for_each_possible_cpu(i) @@ -1205,10 +1220,11 @@ void ebt_unregister_table(struct ebt_table *table) vfree(table->private->chainstack); } vfree(table->private); + kfree(table); } /* userspace just supplied us with counters */ -static int update_counters(void __user *user, unsigned int len) +static int update_counters(struct net *net, void __user *user, unsigned int len) { int i, ret; struct ebt_counter *tmp; @@ -1228,7 +1244,7 @@ static int update_counters(void __user *user, unsigned int len) return -ENOMEM; } - t = find_table_lock(hlp.name, &ret, &ebt_mutex); + t = find_table_lock(net, hlp.name, &ret, &ebt_mutex); if (!t) goto free_tmp; @@ -1386,10 +1402,10 @@ static int do_ebt_set_ctl(struct sock *sk, switch(cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(user, len); + ret = do_replace(sock_net(sk), user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(user, len); + ret = update_counters(sock_net(sk), user, len); break; default: ret = -EINVAL; @@ -1406,7 +1422,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/can/raw.c b/net/can/raw.c index 6e0663faaf9..27aab63df46 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -641,17 +641,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) { - dev_put(dev); - return err; - } + if (!skb) + goto put_dev; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); - if (err < 0) { - kfree_skb(skb); - dev_put(dev); - return err; - } + if (err < 0) + goto free_skb; skb->dev = dev; skb->sk = sk; @@ -660,9 +655,16 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, dev_put(dev); if (err) - return err; + goto send_failed; return size; + +free_skb: + kfree_skb(skb); +put_dev: + dev_put(dev); +send_failed: + return err; } static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/core/datagram.c b/net/core/datagram.c index ee631843c2f..5e2ac0c4b07 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -209,7 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); - sk_mem_reclaim(sk); + sk_mem_reclaim_partial(sk); } /** @@ -248,8 +248,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) spin_unlock_bh(&sk->sk_receive_queue.lock); } - kfree_skb(skb); - sk_mem_reclaim(sk); + skb_free_datagram(sk, skb); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 9174c77d311..446424027d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -108,7 +108,6 @@ #include <linux/init.h> #include <linux/kmod.h> #include <linux/module.h> -#include <linux/kallsyms.h> #include <linux/netpoll.h> #include <linux/rcupdate.h> #include <linux/delay.h> @@ -130,6 +129,9 @@ #include "net-sysfs.h" +/* Instead of increasing this, you should create a hash table. */ +#define MAX_GRO_SKBS 8 + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -281,8 +283,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, - ARPHRD_NONE}; + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, + ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -298,8 +300,8 @@ static const char *netdev_lock_name[] = "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", - "_xmit_NONE"}; + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", + "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; @@ -924,10 +926,15 @@ int dev_change_name(struct net_device *dev, const char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; + /* For now only devices in the initial network namespace + * are in sysfs. + */ + if (net == &init_net) { + ret = device_rename(&dev->dev, dev->name); + if (ret) { + memcpy(dev->name, oldname, IFNAMSIZ); + return ret; + } } write_lock_bh(&dev_base_lock); @@ -1055,6 +1062,7 @@ void dev_load(struct net *net, const char *name) */ int dev_open(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int ret = 0; ASSERT_RTNL(); @@ -1077,11 +1085,11 @@ int dev_open(struct net_device *dev) */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->validate_addr) - ret = dev->validate_addr(dev); + if (ops->ndo_validate_addr) + ret = ops->ndo_validate_addr(dev); - if (!ret && dev->open) - ret = dev->open(dev); + if (!ret && ops->ndo_open) + ret = ops->ndo_open(dev); /* * If it went open OK then: @@ -1125,6 +1133,7 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; ASSERT_RTNL(); might_sleep(); @@ -1157,8 +1166,8 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) - dev->stop(dev); + if (ops->ndo_stop) + ops->ndo_stop(dev); /* * Device is now down. @@ -1527,8 +1536,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __be16 type = skb->protocol; int err; - BUG_ON(skb_shinfo(skb)->frag_list); - skb_reset_mac_header(skb); skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); @@ -1654,6 +1661,9 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + const struct net_device_ops *ops = dev->netdev_ops; + + prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1665,7 +1675,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + return ops->ndo_start_xmit(skb, dev); } gso: @@ -1675,7 +1685,7 @@ gso: skb->next = nskb->next; nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; @@ -1749,10 +1759,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { + const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index = 0; - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) queue_index = simple_tx_hash(dev, skb); @@ -2251,8 +2262,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) + if (!net_alive(dev_net(skb->dev))) { + kfree_skb(skb); goto out; + } #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { @@ -2325,6 +2338,125 @@ static void flush_backlog(void *arg) } } +static int napi_gro_complete(struct sk_buff *skb) +{ + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int err = -ENOENT; + + if (!skb_shinfo(skb)->frag_list) + goto out; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || ptype->dev || !ptype->gro_complete) + continue; + + err = ptype->gro_complete(skb); + break; + } + rcu_read_unlock(); + + if (err) { + WARN_ON(&ptype->list == head); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + +out: + __skb_push(skb, -skb_network_offset(skb)); + return netif_receive_skb(skb); +} + +void napi_gro_flush(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + napi_gro_complete(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(napi_gro_flush); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int count = 0; + int same_flow; + int mac_len; + + if (!(skb->dev->features & NETIF_F_GRO)) + goto normal; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + struct sk_buff *p; + + if (ptype->type != type || ptype->dev || !ptype->gro_receive) + continue; + + skb_reset_network_header(skb); + mac_len = skb->network_header - skb->mac_header; + skb->mac_len = mac_len; + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 0; + + for (p = napi->gro_list; p; p = p->next) { + count++; + NAPI_GRO_CB(p)->same_flow = + p->mac_len == mac_len && + !memcmp(skb_mac_header(p), skb_mac_header(skb), + mac_len); + NAPI_GRO_CB(p)->flush = 0; + } + + pp = ptype->gro_receive(&napi->gro_list, skb); + break; + } + rcu_read_unlock(); + + if (&ptype->list == head) + goto normal; + + same_flow = NAPI_GRO_CB(skb)->same_flow; + + if (pp) { + struct sk_buff *nskb = *pp; + + *pp = nskb->next; + nskb->next = NULL; + napi_gro_complete(nskb); + count--; + } + + if (same_flow) + goto ok; + + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { + __skb_push(skb, -skb_network_offset(skb)); + goto normal; + } + + NAPI_GRO_CB(skb)->count = 1; + skb->next = napi->gro_list; + napi->gro_list = skb; + +ok: + return NET_RX_SUCCESS; + +normal: + return netif_receive_skb(skb); +} +EXPORT_SYMBOL(napi_gro_receive); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2344,9 +2476,11 @@ static int process_backlog(struct napi_struct *napi, int quota) } local_irq_enable(); - netif_receive_skb(skb); + napi_gro_receive(napi, skb); } while (++work < quota && jiffies == start_time); + napi_gro_flush(napi); + return work; } @@ -2367,11 +2501,73 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +void __napi_complete(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + BUG_ON(n->gro_list); + + list_del(&n->poll_list); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} +EXPORT_SYMBOL(__napi_complete); + +void napi_complete(struct napi_struct *n) +{ + unsigned long flags; + + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) + return; + + napi_gro_flush(n); + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); +} +EXPORT_SYMBOL(napi_complete); + +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + INIT_LIST_HEAD(&napi->poll_list); + napi->gro_list = NULL; + napi->poll = poll; + napi->weight = weight; + list_add(&napi->dev_list, &dev->napi_list); +#ifdef CONFIG_NETPOLL + napi->dev = dev; + spin_lock_init(&napi->poll_lock); + napi->poll_owner = -1; +#endif + set_bit(NAPI_STATE_SCHED, &napi->state); +} +EXPORT_SYMBOL(netif_napi_add); + +void netif_napi_del(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + list_del_init(&napi->dev_list); + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + kfree_skb(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(netif_napi_del); + static void net_rx_action(struct softirq_action *h) { struct list_head *list = &__get_cpu_var(softnet_data).poll_list; - unsigned long start_time = jiffies; + unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; @@ -2382,13 +2578,10 @@ static void net_rx_action(struct softirq_action *h) int work, weight; /* If softirq window is exhuasted then punt. - * - * Note that this is a slight policy change from the - * previous NAPI code, which would allow up to 2 - * jiffies to pass before breaking out. The test - * used to be "jiffies - start_time > 1". + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || jiffies != start_time)) + if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) goto softnet_break; local_irq_enable(); @@ -2615,7 +2808,7 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - struct net_device_stats *stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", @@ -2797,31 +2990,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static void ptype_seq_decode(struct seq_file *seq, void *sym) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0, symsize; - const char *symname; - char *modname; - char namebuf[128]; - - symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, - &modname, namebuf); - - if (symname) { - char *delim = ":"; - - if (!modname) - modname = delim = ""; - seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, - symname, offset); - return; - } -#endif - - seq_printf(seq, "[%p]", sym); -} - static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; @@ -2834,10 +3002,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s ", - pt->dev ? pt->dev->name : ""); - ptype_seq_decode(seq, pt->func); - seq_putc(seq, '\n'); + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); } return 0; @@ -2954,13 +3120,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) static void dev_change_rx_flags(struct net_device *dev, int flags) { - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); + const struct net_device_ops *ops = dev->netdev_ops; + + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); } static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + uid_t uid; + gid_t gid; ASSERT_RTNL(); @@ -2985,15 +3155,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); - if (audit_enabled) + if (audit_enabled) { + current_uid_gid(&uid, &gid); audit_log(current->audit_context, GFP_ATOMIC, AUDIT_ANOM_PROMISCUOUS, "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current), - current->uid, current->gid, + uid, gid, audit_get_sessionid(current)); + } dev_change_rx_flags(dev, IFF_PROMISC); } @@ -3075,6 +3247,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) */ void __dev_set_rx_mode(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; + /* dev_open will call this function so the list will stay sane. */ if (!(dev->flags&IFF_UP)) return; @@ -3082,8 +3256,8 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (dev->set_rx_mode) - dev->set_rx_mode(dev); + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); else { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. @@ -3096,8 +3270,8 @@ void __dev_set_rx_mode(struct net_device *dev) dev->uc_promisc = 0; } - if (dev->set_multicast_list) - dev->set_multicast_list(dev); + if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } } @@ -3456,6 +3630,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { + const struct net_device_ops *ops = dev->netdev_ops; int err; if (new_mtu == dev->mtu) @@ -3469,10 +3644,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return -ENODEV; err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); else dev->mtu = new_mtu; + if (!err && dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; @@ -3487,15 +3663,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { + const struct net_device_ops *ops = dev->netdev_ops; int err; - if (!dev->set_mac_address) + if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; if (sa->sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3575,10 +3752,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops; if (!dev) return -ENODEV; + ops = dev->netdev_ops; + switch (cmd) { case SIOCSIFFLAGS: /* Set interface flags */ return dev_change_flags(dev, ifr->ifr_flags); @@ -3602,15 +3782,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (dev->set_config) { + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); + return ops->ndo_set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3619,7 +3799,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3657,10 +3837,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; - if (dev->do_ioctl) { + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); + err = ops->ndo_do_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -3921,8 +4100,8 @@ static void rollback_registered(struct net_device *dev) */ dev_addr_discard(dev); - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); /* Notifier chain MUST detach us from master device. */ WARN_ON(dev->master); @@ -4012,7 +4191,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; - struct net *net; + struct net *net = dev_net(dev); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -4021,8 +4200,7 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -4030,9 +4208,46 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + /* Netdevice_ops API compatiability support. + * This is temporary until all network devices are converted. + */ + if (dev->netdev_ops) { + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif + } else { + char drivername[64]; + pr_info("%s (%s): not using net_device_ops yet\n", + dev->name, netdev_drivername(dev, drivername, 64)); + + /* This works only because net_device_ops and the + compatiablity structure are the same. */ + dev->netdev_ops = (void *) &(dev->init); + } +#endif + /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); + if (dev->netdev_ops->ndo_init) { + ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; @@ -4110,8 +4325,8 @@ out: return ret; err_uninit: - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); goto out; } @@ -4267,10 +4482,24 @@ void netdev_run_todo(void) } } -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; +/** + * dev_get_stats - get network device statistics + * @dev: device to get statistics from + * + * Get network statistics from device. The device driver may provide + * its own method by setting dev->netdev_ops->get_stats; otherwise + * the internal statistics structure is used. + */ +const struct net_device_stats *dev_get_stats(struct net_device *dev) + { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_stats) + return ops->ndo_get_stats(dev); + else + return &dev->stats; } +EXPORT_SYMBOL(dev_get_stats); static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, @@ -4339,18 +4568,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; - if (sizeof_priv) { - dev->priv = ((char *)dev + - ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST)); - } - dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); - dev->get_stats = internal_stats; - netpoll_netdev_init(dev); + INIT_LIST_HEAD(&dev->napi_list); setup(dev); strcpy(dev->name, name); return dev; @@ -4367,10 +4589,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + struct napi_struct *p, *n; + release_net(dev_net(dev)); kfree(dev->_tx); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) + netif_napi_del(p); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4463,6 +4690,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char if (dev->features & NETIF_F_NETNS_LOCAL) goto out; +#ifdef CONFIG_SYSFS + /* Don't allow real devices to be moved when sysfs + * is enabled. + */ + err = -EINVAL; + if (dev->dev.parent) + goto out; +#endif + /* Ensure the device has been registrered */ err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) @@ -4520,6 +4756,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ dev_addr_discard(dev); + netdev_unregister_kobject(dev); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -4536,7 +4774,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char } /* Fixup kobjects */ - netdev_unregister_kobject(dev); err = netdev_register_kobject(dev); WARN_ON(err); @@ -4843,6 +5080,12 @@ static void __net_exit default_device_exit(struct net *net) if (dev->features & NETIF_F_NETNS_LOCAL) continue; + /* Delete virtual devices */ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { + dev->rtnl_link_ops->dellink(dev); + continue; + } + /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); @@ -4889,9 +5132,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - if (register_pernet_device(&default_device_ops)) - goto out; - /* * Initialise the packet receive queues. */ @@ -4906,12 +5146,28 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; + queue->backlog.gro_list = NULL; } - netdev_dma_register(); - dev_boot_phase = 0; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + + if (register_pernet_device(&default_device_ops)) + goto out; + + netdev_dma_register(); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); diff --git a/net/core/dst.c b/net/core/dst.c index 09c1530f468..57bc4d5b8d0 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -263,9 +263,11 @@ again: void dst_release(struct dst_entry *dst) { if (dst) { - WARN_ON(atomic_read(&dst->__refcnt) < 1); + int newrefcnt; + smp_mb__before_atomic_dec(); - atomic_dec(&dst->__refcnt); + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 14ada537f89..947710a36ce 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } +static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_rx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (!edata.data && dev->ethtool_ops->set_sg) + dev->features &= ~NETIF_F_GRO; + + return dev->ethtool_ops->set_rx_csum(dev, edata.data); +} + static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) return 0; } +static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGRO }; + + edata.data = dev->features & NETIF_F_GRO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (edata.data) { + if (!dev->ethtool_ops->get_rx_csum || + !dev->ethtool_ops->get_rx_csum(dev)) + return -EINVAL; + dev->features |= NETIF_F_GRO; + } else + dev->features &= ~NETIF_F_GRO; + + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_rx_csum); + rc = ethtool_set_rx_csum(dev, useraddr); break; case ETHTOOL_GTXCSUM: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFH: rc = ethtool_set_rxhash(dev, useraddr); break; + case ETHTOOL_GGRO: + rc = ethtool_get_gro(dev, useraddr); + break; + case ETHTOOL_SGRO: + rc = ethtool_set_gro(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 79de3b14a8d..32b3a0152d7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -664,17 +664,18 @@ static int __init fib_rules_init(void) rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - err = register_netdevice_notifier(&fib_rules_notifier); + err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) goto fail; - err = register_pernet_subsys(&fib_rules_net_ops); + err = register_netdevice_notifier(&fib_rules_notifier); if (err < 0) goto fail_unregister; + return 0; fail_unregister: - unregister_netdevice_notifier(&fib_rules_notifier); + unregister_pernet_subsys(&fib_rules_net_ops); fail: rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); rtnl_unregister(PF_UNSPEC, RTM_DELRULE); diff --git a/net/core/filter.c b/net/core/filter.c index df374435583..d1d779ca096 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -319,6 +319,25 @@ load_b: A = 0; continue; } + case SKF_AD_NLATTR_NEST: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *)&skb->data[A]; + if (nla->nla_len > A - skb->len) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } diff --git a/net/core/flow.c b/net/core/flow.c index 5cf81052d04..96015871ece 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -225,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + err = resolver(net, key, family, dir, &obj, &obj_ref); if (fle && !err) { fle->genid = atomic_read(&flow_cache_genid); @@ -307,7 +307,7 @@ void flow_cache_flush(void) put_online_cpus(); } -static void __devinit flow_cache_cpu_prepare(int cpu) +static void __init flow_cache_cpu_prepare(int cpu) { struct tasklet_struct *tasklet; unsigned long order; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 57abe8266be..9cc9f95b109 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> +#include <linux/rbtree.h> #include <net/sock.h> #include <net/gen_stats.h> @@ -89,6 +90,7 @@ struct gen_estimator u32 avpps; u32 avbps; struct rcu_head e_rcu; + struct rb_node node; }; struct gen_estimator_head @@ -102,6 +104,9 @@ static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; /* Protects against NULL dereference */ static DEFINE_RWLOCK(est_lock); +/* Protects against soft lockup during large deletion */ +static struct rb_root est_root = RB_ROOT; + static void est_timer(unsigned long arg) { int idx = (int)arg; @@ -139,6 +144,46 @@ skip: rcu_read_unlock(); } +static void gen_add_node(struct gen_estimator *est) +{ + struct rb_node **p = &est_root.rb_node, *parent = NULL; + + while (*p) { + struct gen_estimator *e; + + parent = *p; + e = rb_entry(parent, struct gen_estimator, node); + + if (est->bstats > e->bstats) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&est->node, parent, p); + rb_insert_color(&est->node, &est_root); +} + +static +struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) +{ + struct rb_node *p = est_root.rb_node; + + while (p) { + struct gen_estimator *e; + + e = rb_entry(p, struct gen_estimator, node); + + if (bstats > e->bstats) + p = p->rb_right; + else if (bstats < e->bstats || rate_est != e->rate_est) + p = p->rb_left; + else + return e; + } + return NULL; +} + /** * gen_new_estimator - create a new rate estimator * @bstats: basic statistics @@ -194,8 +239,11 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); list_add_rcu(&est->list, &elist[idx].list); + gen_add_node(est); + return 0; } +EXPORT_SYMBOL(gen_new_estimator); static void __gen_kill_estimator(struct rcu_head *head) { @@ -209,36 +257,27 @@ static void __gen_kill_estimator(struct rcu_head *head) * @bstats: basic statistics * @rate_est: rate estimator statistics * - * Removes the rate estimator specified by &bstats and &rate_est - * and deletes the timer. + * Removes the rate estimator specified by &bstats and &rate_est. * * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est *rate_est) { - int idx; - struct gen_estimator *e, *n; - - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - - /* Skip non initialized indexes */ - if (!elist[idx].timer.function) - continue; + struct gen_estimator *e; - list_for_each_entry_safe(e, n, &elist[idx].list, list) { - if (e->rate_est != rate_est || e->bstats != bstats) - continue; + while ((e = gen_find_node(bstats, rate_est))) { + rb_erase(&e->node, &est_root); - write_lock_bh(&est_lock); - e->bstats = NULL; - write_unlock_bh(&est_lock); + write_lock_bh(&est_lock); + e->bstats = NULL; + write_unlock_bh(&est_lock); - list_del_rcu(&e->list); - call_rcu(&e->e_rcu, __gen_kill_estimator); - } + list_del_rcu(&e->list); + call_rcu(&e->e_rcu, __gen_kill_estimator); } } +EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration @@ -259,8 +298,20 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats, gen_kill_estimator(bstats, rate_est); return gen_new_estimator(bstats, rate_est, stats_lock, opt); } +EXPORT_SYMBOL(gen_replace_estimator); +/** + * gen_estimator_active - test if estimator is currently in use + * @bstats: basic statistics + * @rate_est: rate estimator statistics + * + * Returns true if estimator is active, and false if not. + */ +bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) +{ + ASSERT_RTNL(); -EXPORT_SYMBOL(gen_kill_estimator); -EXPORT_SYMBOL(gen_new_estimator); -EXPORT_SYMBOL(gen_replace_estimator); + return gen_find_node(bstats, rate_est) != NULL; +} +EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1dc728b3858..9c3717a23cf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; -#ifdef CONFIG_NET_NS - n->net = hold_net(net); -#endif + write_pnet(&n->net, hold_net(net)); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -1329,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p, *ref; - struct net *net; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1340,20 +1338,17 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); - INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { kfree(p); return NULL; } dev_hold(dev); p->dev = dev; -#ifdef CONFIG_NET_NS - p->net = hold_net(net); -#endif + write_pnet(&p->net, hold_net(net)); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1408,11 +1403,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; -#ifdef CONFIG_NET_NS - tbl->parms.net = &init_net; -#endif + write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); - INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); @@ -1426,9 +1418,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops, tbl); - if (!tbl->pde) + if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl)) panic("cannot create neighbour proc dir entry"); #endif @@ -2568,128 +2559,128 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = proc_dointvec_userhz_jiffies, }, { .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {}, }, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 92d6b946731..6ac29a46e23 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d, unsigned long offset) { struct net_device *dev = to_net_dev(d); - struct net_device_stats *stats; ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct net_device_stats) || @@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d, read_lock(&dev_base_lock); if (dev_isalive(dev)) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); } @@ -428,6 +427,9 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) struct net_device *dev = to_net_dev(d); int retval; + if (!net_eq(dev_net(dev), &init_net)) + return 0; + /* pass interface to uevent. */ retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) @@ -476,6 +478,10 @@ void netdev_unregister_kobject(struct net_device * net) struct device *dev = &(net->dev); kobject_get(&dev->kobj); + + if (dev_net(net) != &init_net) + return; + device_del(dev); } @@ -490,7 +496,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + dev_set_name(dev, net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; @@ -501,6 +507,9 @@ int netdev_register_kobject(struct net_device *net) #endif #endif /* CONFIG_SYSFS */ + if (dev_net(net) != &init_net) + return 0; + return device_add(dev); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1895a4ca9c4..55cffad2f32 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -47,7 +47,6 @@ static __net_init int setup_net(struct net *net) goto out; ng->len = INITIAL_NET_GEN_PTRS; - INIT_RCU_HEAD(&ng->rcu); rcu_assign_pointer(net->gen, ng); error = 0; @@ -478,7 +477,6 @@ int net_assign_generic(struct net *net, int id, void *data) */ ng->len = id; - INIT_RCU_HEAD(&ng->rcu); memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); rcu_assign_pointer(net->gen, ng); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0..755414cd49d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { @@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work) __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); local_irq_restore(flags); @@ -133,9 +134,11 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); + set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; @@ -172,12 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (!dev || !netif_running(dev) || !dev->poll_controller) + if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) return; /* Process pending work on NIC */ - dev->poll_controller(dev); + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -272,6 +276,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -292,7 +297,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) - status = dev->hard_start_xmit(skb, dev); + status = ops->ndo_start_xmit(skb, dev); __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) @@ -343,7 +348,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, - csum_partial((unsigned char *)udph, udp_len, 0)); + csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; @@ -553,7 +558,6 @@ out: void netpoll_print_options(struct netpoll *np) { - DECLARE_MAC_BUF(mac); printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", @@ -564,8 +568,8 @@ void netpoll_print_options(struct netpoll *np) np->name, np->remote_port); printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->remote_ip)); - printk(KERN_INFO "%s: remote ethernet address %s\n", - np->name, print_mac(mac, np->remote_mac)); + printk(KERN_INFO "%s: remote ethernet address %pM\n", + np->name, np->remote_mac); } int netpoll_parse_options(struct netpoll *np, char *opt) @@ -695,7 +699,7 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->poll_controller) { + if (!ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8997e912aaa..65498483325 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -422,6 +422,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); +static void pktgen_reset_all_threads(void); static void pktgen_stop_all_threads_ifs(void); static int pktgen_stop_device(struct pktgen_dev *pkt_dev); static void pktgen_stop(struct pktgen_thread *t); @@ -480,6 +481,9 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, else if (!strcmp(data, "start")) pktgen_run_all_threads(); + else if (!strcmp(data, "reset")) + pktgen_reset_all_threads(); + else printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); @@ -509,7 +513,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v) __u64 sa; __u64 stopped; __u64 now = getCurUs(); - DECLARE_MAC_BUF(mac); seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", @@ -554,12 +557,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " src_mac: "); - seq_printf(seq, "%s ", - print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ? - pkt_dev->odev->dev_addr : pkt_dev->src_mac)); + seq_printf(seq, "%pM ", + is_zero_ether_addr(pkt_dev->src_mac) ? + pkt_dev->odev->dev_addr : pkt_dev->src_mac); seq_printf(seq, "dst_mac: "); - seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac)); + seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", @@ -2162,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + x = xfrm_stateonly_find(&init_net, + (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, pkt_dev->ipsmode, @@ -3169,6 +3173,24 @@ static void pktgen_run_all_threads(void) pktgen_wait_all_threads_run(); } +static void pktgen_reset_all_threads(void) +{ + struct pktgen_thread *t; + + pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); + + mutex_lock(&pktgen_thread_lock); + + list_for_each_entry(t, &pktgen_threads, th_list) + t->control |= (T_REMDEVALL); + + mutex_unlock(&pktgen_thread_lock); + + schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + + pktgen_wait_all_threads_run(); +} + static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) { __u64 total_us, bps, mbps, pps, idle; @@ -3331,14 +3353,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { - struct net_device *odev = NULL; + struct net_device *odev = pkt_dev->odev; + int (*xmit)(struct sk_buff *, struct net_device *) + = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; __u64 idle_start = 0; u16 queue_map; int ret; - odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { u64 now; @@ -3419,7 +3441,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_inc(&(pkt_dev->skb->users)); retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); + ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4dfb6b4d455..790dd205bb5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) + const struct net_device_stats *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct net_device_stats *stats; + const struct net_device_stats *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { @@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { + const struct net_device_ops *ops = dev->netdev_ops; int send_addr_notify = 0; int err; @@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct rtnl_link_ifmap *u_map; struct ifmap k_map; - if (!dev->set_config) { + if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } @@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; - err = dev->set_config(dev, &k_map); + err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; @@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!dev->set_mac_address) { + if (!ops->ndo_set_mac_address) { err = -EOPNOTSUPP; goto errout; } @@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; diff --git a/net/core/scm.c b/net/core/scm.c index b12303dd39d..b7ba91b074b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,11 +44,13 @@ static __inline__ int scm_check_creds(struct ucred *creds) { + const struct cred *cred = current_cred(); + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current->uid || creds->uid == current->euid || - creds->uid == current->suid) || capable(CAP_SETUID)) && - ((creds->gid == current->gid || creds->gid == current->egid || - creds->gid == current->sgid) || capable(CAP_SETGID))) { + ((creds->uid == cred->uid || creds->uid == cred->euid || + creds->uid == cred->suid) || capable(CAP_SETUID)) && + ((creds->gid == cred->gid || creds->gid == cred->egid || + creds->gid == cred->sgid) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 65f7757465b..b8d0abb2643 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -501,7 +501,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET +#ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); @@ -556,6 +556,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(truesize); #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) C(do_not_encrypt); + C(requeue); #endif atomic_set(&n->users, 1); @@ -2017,6 +2018,148 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/* Shifting from/to a cloned skb is a no-go. + * + * Caller cannot keep skb_shinfo related pointers past calling here! + */ +static int skb_prepare_for_shift(struct sk_buff *skb) +{ + return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} + +/** + * skb_shift - Shifts paged data partially from skb to another + * @tgt: buffer into which tail data gets added + * @skb: buffer from which the paged data comes from + * @shiftlen: shift up to this many bytes + * + * Attempts to shift up to shiftlen worth of bytes, which may be less than + * the length of the skb, from tgt to skb. Returns number bytes shifted. + * It's up to caller to free skb if everything was shifted. + * + * If @tgt runs out of frags, the whole operation is aborted. + * + * Skb cannot include anything else but paged data while tgt is allowed + * to have non-paged data as well. + * + * TODO: full sized shift could be optimized but that would need + * specialized skb free'er to handle frags without up-to-date nr_frags. + */ +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) +{ + int from, to, merge, todo; + struct skb_frag_struct *fragfrom, *fragto; + + BUG_ON(shiftlen > skb->len); + BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ + + todo = shiftlen; + from = 0; + to = skb_shinfo(tgt)->nr_frags; + fragfrom = &skb_shinfo(skb)->frags[from]; + + /* Actual merge is delayed until the point when we know we can + * commit all, so that we don't have to undo partial changes + */ + if (!to || + !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + merge = -1; + } else { + merge = to - 1; + + todo -= fragfrom->size; + if (todo < 0) { + if (skb_prepare_for_shift(skb) || + skb_prepare_for_shift(tgt)) + return 0; + + /* All previous frag pointers might be stale! */ + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += shiftlen; + fragfrom->size -= shiftlen; + fragfrom->page_offset += shiftlen; + + goto onlymerged; + } + + from++; + } + + /* Skip full, not-fitting skb to avoid expensive operations */ + if ((shiftlen == skb->len) && + (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) + return 0; + + if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) + return 0; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + if (to == MAX_SKB_FRAGS) + return 0; + + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= fragfrom->size) { + *fragto = *fragfrom; + todo -= fragfrom->size; + from++; + to++; + + } else { + get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; + + fragfrom->page_offset += todo; + fragfrom->size -= todo; + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + if (merge >= 0) { + fragfrom = &skb_shinfo(skb)->frags[0]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; + put_page(fragfrom->page); + } + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + skb_shinfo(skb)->nr_frags = to; + + BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + +onlymerged: + /* Most likely the tgt won't ever need its checksum anymore, skb on + * the other hand might need it if it needs to be resent + */ + tgt->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; + + /* Yak, is it really working this way? Some helper please? */ + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read @@ -2285,6 +2428,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; + struct sk_buff *fskb = skb_shinfo(skb)->frag_list; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2304,7 +2448,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) struct sk_buff *nskb; skb_frag_t *frag; int hsize; - int k; int size; len = skb->len - offset; @@ -2317,9 +2460,36 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) if (hsize > len || !sg) hsize = len; - nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); - if (unlikely(!nskb)) - goto err; + if (!hsize && i >= nfrags) { + BUG_ON(fskb->len != len); + + pos += len; + nskb = skb_clone(fskb, GFP_ATOMIC); + fskb = fskb->next; + + if (unlikely(!nskb)) + goto err; + + hsize = skb_end_pointer(nskb) - nskb->head; + if (skb_cow_head(nskb, doffset + headroom)) { + kfree_skb(nskb); + goto err; + } + + nskb->truesize += skb_end_pointer(nskb) - nskb->head - + hsize; + skb_release_head_state(nskb); + __skb_push(nskb, doffset); + } else { + nskb = alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC); + + if (unlikely(!nskb)) + goto err; + + skb_reserve(nskb, headroom); + __skb_put(nskb, doffset); + } if (segs) tail->next = nskb; @@ -2330,13 +2500,15 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; - skb_reserve(nskb, headroom); skb_reset_mac_header(nskb); skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, skb_put(nskb, doffset), - doffset); + skb_copy_from_linear_data(skb, nskb->data, doffset); + + if (pos >= offset + len) + continue; + if (!sg) { nskb->ip_summed = CHECKSUM_NONE; nskb->csum = skb_copy_and_csum_bits(skb, offset, @@ -2346,14 +2518,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) } frag = skb_shinfo(nskb)->frags; - k = 0; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - while (pos < offset + len) { - BUG_ON(i >= nfrags); - + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; get_page(frag->page); size = frag->size; @@ -2363,20 +2532,39 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag->size -= offset - pos; } - k++; + skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; pos += size; } else { frag->size -= pos + size - (offset + len); - break; + goto skip_fraglist; } frag++; } - skb_shinfo(nskb)->nr_frags = k; + if (pos < offset + len) { + struct sk_buff *fskb2 = fskb; + + BUG_ON(pos + fskb->len != offset + len); + + pos += fskb->len; + fskb = fskb->next; + + if (fskb2->next) { + fskb2 = skb_clone(fskb2, GFP_ATOMIC); + if (!fskb2) + goto err; + } else + skb_get(fskb2); + + BUG_ON(skb_shinfo(nskb)->frag_list); + skb_shinfo(nskb)->frag_list = fskb2; + } + +skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; @@ -2394,6 +2582,65 @@ err: EXPORT_SYMBOL_GPL(skb_segment); +int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff *p = *head; + struct sk_buff *nskb; + unsigned int headroom; + unsigned int hlen = p->data - skb_mac_header(p); + + if (hlen + p->len + skb->len >= 65536) + return -E2BIG; + + if (skb_shinfo(p)->frag_list) + goto merge; + + headroom = skb_headroom(p); + nskb = netdev_alloc_skb(p->dev, headroom); + if (unlikely(!nskb)) + return -ENOMEM; + + __copy_skb_header(nskb, p); + nskb->mac_len = p->mac_len; + + skb_reserve(nskb, headroom); + + skb_set_mac_header(nskb, -hlen); + skb_set_network_header(nskb, skb_network_offset(p)); + skb_set_transport_header(nskb, skb_transport_offset(p)); + + memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + + *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); + skb_shinfo(nskb)->frag_list = p; + skb_header_release(p); + nskb->prev = p; + + nskb->data_len += p->len; + nskb->truesize += p->len; + nskb->len += p->len; + + *head = nskb; + nskb->next = p->next; + p->next = NULL; + + p = nskb; + +merge: + NAPI_GRO_CB(p)->count++; + p->prev->next = skb; + p->prev = skb; + skb_header_release(skb); + + p->data_len += skb->len; + p->truesize += skb->len; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + return 0; +} +EXPORT_SYMBOL_GPL(skb_gro_receive); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", diff --git a/net/core/sock.c b/net/core/sock.c index edf7220889a..f3a0d08cbb4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) @@ -2037,7 +2041,8 @@ int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | prot->slab_flags, + NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", @@ -2076,7 +2081,9 @@ int proto_register(struct proto *prot, int alloc_slab) prot->twsk_prot->twsk_slab = kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, + 0, + SLAB_HWCACHE_ALIGN | + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; @@ -2164,7 +2171,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, - proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1, + sock_prot_inuse_get(seq_file_net(seq), proto), proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", proto->max_header, @@ -2218,7 +2225,8 @@ static const struct seq_operations proto_seq_ops = { static int proto_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &proto_seq_ops); + return seq_open_net(inode, file, &proto_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations proto_seq_fops = { @@ -2226,13 +2234,31 @@ static const struct file_operations proto_seq_fops = { .open = proto_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, +}; + +static __net_init int proto_init_net(struct net *net) +{ + if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) + return -ENOMEM; + + return 0; +} + +static __net_exit void proto_exit_net(struct net *net) +{ + proc_net_remove(net, "protocols"); +} + + +static __net_initdata struct pernet_operations proto_net_ops = { + .init = proto_init_net, + .exit = proto_exit_net, }; static int __init proto_init(void) { - /* register /proc/net/protocols */ - return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return register_pernet_subsys(&proto_net_ops); } subsys_initcall(proto_init); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f686467ff12..83d3398559e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -12,7 +12,6 @@ #include <linux/netdevice.h> #include <linux/init.h> #include <net/sock.h> -#include <net/xfrm.h> static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET @@ -22,7 +21,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_MAX, @@ -30,7 +29,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WMEM_DEFAULT, @@ -38,7 +37,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_wmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_RMEM_DEFAULT, @@ -46,7 +45,7 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_rmem_default, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_DEV_WEIGHT, @@ -54,7 +53,7 @@ static struct ctl_table net_core_table[] = { .data = &weight_p, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MAX_BACKLOG, @@ -62,7 +61,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_max_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_MSG_COST, @@ -70,8 +69,8 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_CORE_MSG_BURST, @@ -79,7 +78,7 @@ static struct ctl_table net_core_table[] = { .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CORE_OPTMEM_MAX, @@ -87,42 +86,8 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_optmem_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, -#ifdef CONFIG_XFRM - { - .ctl_name = NET_CORE_AEVENT_ETIME, - .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_AEVENT_RSEQTH, - .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { .ctl_name = NET_CORE_BUDGET, @@ -130,7 +95,7 @@ static struct ctl_table net_core_table[] = { .data = &netdev_budget, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_CORE_WARNINGS, @@ -138,7 +103,7 @@ static struct ctl_table net_core_table[] = { .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -150,12 +115,12 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; -static __net_initdata struct ctl_path net_core_path[] = { +__net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, { }, @@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + static struct ctl_table empty[1]; + + register_sysctl_paths(net_core_path, empty); register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } -__initcall(sysctl_core_init); +fs_initcall(sysctl_core_init); diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig new file mode 100644 index 00000000000..4066d59c8de --- /dev/null +++ b/net/dcb/Kconfig @@ -0,0 +1,22 @@ +config DCB + bool "Data Center Bridging support" + default n + ---help--- + This enables support for configuring Data Center Bridging (DCB) + features on DCB capable Ethernet adapters via rtnetlink. Say 'Y' + if you have a DCB capable Ethernet adapter which supports this + interface and you are connected to a DCB capable switch. + + DCB is a collection of Ethernet enhancements which allow DCB capable + NICs and switches to support network traffic with differing + requirements (highly reliable, no drops vs. best effort vs. low + latency) to co-exist on Ethernet. + + DCB features include: + Enhanced Transmission Selection (aka Priority Grouping) - provides a + framework for assigning bandwidth guarantees to traffic classes. + Priority-based Flow Control (PFC) - a MAC control pause frame which + works at the granularity of the 802.1p priority instead of the + link (802.3x). + + If unsure, say N. diff --git a/net/dcb/Makefile b/net/dcb/Makefile new file mode 100644 index 00000000000..9930f4cde81 --- /dev/null +++ b/net/dcb/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DCB) += dcbnl.o diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c new file mode 100644 index 00000000000..5dbfe5fdc0d --- /dev/null +++ b/net/dcb/dcbnl.c @@ -0,0 +1,1122 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu <lucy.liu@intel.com> + */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <net/netlink.h> +#include <net/rtnetlink.h> +#include <linux/dcbnl.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> + +/** + * Data Center Bridging (DCB) is a collection of Ethernet enhancements + * intended to allow network traffic with differing requirements + * (highly reliable, no drops vs. best effort vs. low latency) to operate + * and co-exist on Ethernet. Current DCB features are: + * + * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a + * framework for assigning bandwidth guarantees to traffic classes. + * + * Priority-based Flow Control (PFC) - provides a flow control mechanism which + * can work independently for each 802.1p priority. + * + * Congestion Notification - provides a mechanism for end-to-end congestion + * control for protocols which do not have built-in congestion management. + * + * More information about the emerging standards for these Ethernet features + * can be found at: http://www.ieee802.org/1/pages/dcbridges.html + * + * This file implements an rtnetlink interface to allow configuration of DCB + * features for capable devices. + */ + +MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>"); +MODULE_DESCRIPTION("Data Center Bridging netlink interface"); +MODULE_LICENSE("GPL"); + +/**************** DCB attribute policies *************************************/ + +/* DCB netlink attributes policy */ +static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { + [DCB_ATTR_IFNAME] = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1}, + [DCB_ATTR_STATE] = {.type = NLA_U8}, + [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, + [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, + [DCB_ATTR_CAP] = {.type = NLA_NESTED}, + [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, + [DCB_ATTR_BCN] = {.type = NLA_NESTED}, +}; + +/* DCB priority flow control to User Priority nested attributes */ +static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { + [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB priority grouping nested attributes */ +static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { + [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB traffic class nested attributes. */ +static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { + [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { + [DCB_CAP_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_CAP_ATTR_PG] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_UP2TC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PG_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_GSP] = {.type = NLA_U8}, + [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, +}; + +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { + [DCB_NUMTCS_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_NUMTCS_ATTR_PG] = {.type = NLA_U8}, + [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, +}; + +/* DCB BCN nested attributes. */ +static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { + [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, + [DCB_BCN_ATTR_BCNA_0] = {.type = NLA_U32}, + [DCB_BCN_ATTR_BCNA_1] = {.type = NLA_U32}, + [DCB_BCN_ATTR_ALPHA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_BETA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TMAX] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RMIN] = {.type = NLA_U32}, + [DCB_BCN_ATTR_W] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RU] = {.type = NLA_U32}, + [DCB_BCN_ATTR_WRTT] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_C] = {.type = NLA_U32}, + [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* standard netlink reply call */ +static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, + u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct dcbmsg *dcb; + struct nlmsghdr *nlh; + int ret = -EINVAL; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + return ret; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + dcb->dcb_pad = 0; + + ret = nla_put_u8(dcbnl_skb, attr, value); + if (ret) + goto err; + + /* end the message, assign the nlmsg_len. */ + nlmsg_end(dcbnl_skb, nlh); + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); + return ret; +} + +static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */ + if (!netdev->dcbnl_ops->getstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB, + DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags); + + return ret; +} + +static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_PFC_GCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG); + if (!nest) + goto err; + + if (data[DCB_PFC_UP_ATTR_ALL]) + getall = 1; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (!getall && !data[i]) + continue; + + netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, + &value); + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + u8 perm_addr[MAX_ADDR_LEN]; + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpermhwaddr) + return ret; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GPERM_HWADDR; + + netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); + + ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), + perm_addr); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_CAP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_CAP] || !netdev->dcbnl_ops->getcap) + return ret; + + ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], + dcbnl_cap_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GCAP; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_CAP); + if (!nest) + goto err; + + if (data[DCB_CAP_ATTR_ALL]) + getall = 1; + + for (i = DCB_CAP_ATTR_ALL+1; i <= DCB_CAP_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + if (!netdev->dcbnl_ops->getcap(netdev, i, &value)) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->getnumtcs) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + if (ret) { + ret = -EINVAL; + goto err_out; + } + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) { + ret = -EINVAL; + goto err_out; + } + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GNUMTCS; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_NUMTCS); + if (!nest) { + ret = -EINVAL; + goto err; + } + + if (data[DCB_NUMTCS_ATTR_ALL]) + getall = 1; + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + ret = netdev->dcbnl_ops->getnumtcs(netdev, i, &value); + if (!ret) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + ret = -EINVAL; + goto err; + } + } else { + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) { + ret = -EINVAL; + goto err; + } + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return ret; +} + +static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1]; + int ret = -EINVAL; + u8 value; + int i; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->setnumtcs) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + + if (ret) { + ret = -EINVAL; + goto err; + } + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (data[i] == NULL) + continue; + + value = nla_get_u8(data[i]); + + ret = netdev->dcbnl_ops->setnumtcs(netdev, i, value); + + if (ret) + goto operr; + } + +operr: + ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SNUMTCS, + DCB_ATTR_NUMTCS, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_getpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpfcstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getpfcstate(netdev), RTM_GETDCB, + DCB_CMD_PFC_GSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_STATE] || !netdev->dcbnl_ops->setpfcstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_PFC_STATE]); + + netdev->dcbnl_ops->setpfcstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *pg_nest, *param_nest, *data; + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + u8 prio, pgid, tc_pct, up_map; + int ret = -EINVAL; + int getall = 0; + int i; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->getpgtccfgtx || + !netdev->dcbnl_ops->getpgtccfgrx || + !netdev->dcbnl_ops->getpgbwgcfgtx || + !netdev->dcbnl_ops->getpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG; + + pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG); + if (!pg_nest) + goto err; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + getall = 1; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + data = pg_tb[DCB_PG_ATTR_TC_ALL]; + else + data = pg_tb[i]; + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + data, dcbnl_tc_param_nest); + if (ret) + goto err_pg; + + param_nest = nla_nest_start(dcbnl_skb, i); + if (!param_nest) + goto err_pg; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } + + if (param_tb[DCB_TC_ATTR_PARAM_PGID] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_PGID, pgid); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT, + tc_pct); + if (ret) + goto err_param; + } + nla_nest_end(dcbnl_skb, param_nest); + } + + if (pg_tb[DCB_PG_ATTR_BW_ID_ALL]) + getall = 1; + else + getall = 0; + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } + ret = nla_put_u8(dcbnl_skb, i, tc_pct); + + if (ret) + goto err_pg; + } + + nla_nest_end(dcbnl_skb, pg_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_param: + nla_nest_cancel(dcbnl_skb, param_nest); +err_pg: + nla_nest_cancel(dcbnl_skb, pg_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_STATE]); + + ret = dcbnl_reply(netdev->dcbnl_ops->setstate(netdev, value), + RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (data[i] == NULL) + continue; + value = nla_get_u8(data[i]); + netdev->dcbnl_ops->setpfccfg(netdev, + data[i]->nla_type - DCB_PFC_UP_ATTR_0, value); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG, + pid, seq, flags); +err: + return ret; +} + +static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB, + DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + int ret = -EINVAL; + int i; + u8 pgid; + u8 up_map; + u8 prio; + u8 tc_pct; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->setpgtccfgtx || + !netdev->dcbnl_ops->setpgtccfgrx || + !netdev->dcbnl_ops->setpgbwgcfgtx || + !netdev->dcbnl_ops->setpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + if (ret) + goto err; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!pg_tb[i]) + continue; + + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], dcbnl_tc_param_nest); + if (ret) + goto err; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]) + prio = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]); + + if (param_tb[DCB_TC_ATTR_PARAM_PGID]) + pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]); + + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT]) + tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]); + + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]) + up_map = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!pg_tb[i]) + continue; + + tc_pct = nla_get_u8(pg_tb[i]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } + } + + ret = dcbnl_reply(0, RTM_SETDCB, + (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), + DCB_ATTR_PG_CFG, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *bcn_nest; + struct nlattr *bcn_tb[DCB_BCN_ATTR_MAX + 1]; + u8 value_byte; + u32 value_integer; + int ret = -EINVAL; + bool getall = false; + int i; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->getbcnrp || + !netdev->dcbnl_ops->getbcncfg) + return ret; + + ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_BCN_GCFG; + + bcn_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_BCN); + if (!bcn_nest) + goto err; + + if (bcn_tb[DCB_BCN_ATTR_ALL]) + getall = true; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcnrp(netdev, i - DCB_BCN_ATTR_RP_0, + &value_byte); + ret = nla_put_u8(dcbnl_skb, i, value_byte); + if (ret) + goto err_bcn; + } + + for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcncfg(netdev, i, + &value_integer); + ret = nla_put_u32(dcbnl_skb, i, value_integer); + if (ret) + goto err_bcn; + } + + nla_nest_end(dcbnl_skb, bcn_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_bcn: + nla_nest_cancel(dcbnl_skb, bcn_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_BCN_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value_byte; + u32 value_int; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg + || !netdev->dcbnl_ops->setbcnrp) + return ret; + + ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (data[i] == NULL) + continue; + value_byte = nla_get_u8(data[i]); + netdev->dcbnl_ops->setbcnrp(netdev, + data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte); + } + + for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { + if (data[i] == NULL) + continue; + value_int = nla_get_u32(data[i]); + netdev->dcbnl_ops->setbcncfg(netdev, + i, value_int); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_BCN_SCFG, DCB_ATTR_BCN, + pid, seq, flags); +err: + return ret; +} + +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh); + struct nlattr *tb[DCB_ATTR_MAX + 1]; + u32 pid = skb ? NETLINK_CB(skb).pid : 0; + int ret = -EINVAL; + + if (net != &init_net) + return -EINVAL; + + ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy); + if (ret < 0) + return ret; + + if (!tb[DCB_ATTR_IFNAME]) + return -EINVAL; + + netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME])); + if (!netdev) + return -EINVAL; + + if (!netdev->dcbnl_ops) + goto errout; + + switch (dcb->cmd) { + case DCB_CMD_GSTATE: + ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GCFG: + ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GPERM_HWADDR: + ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_GCFG: + ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_GCFG: + ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_BCN_GCFG: + ret = dcbnl_bcn_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SSTATE: + ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SCFG: + ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + + case DCB_CMD_SET_ALL: + ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_SCFG: + ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_SCFG: + ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GCAP: + ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GNUMTCS: + ret = dcbnl_getnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SNUMTCS: + ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GSTATE: + ret = dcbnl_getpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SSTATE: + ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_BCN_SCFG: + ret = dcbnl_bcn_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + default: + goto errout; + } +errout: + ret = -EINVAL; +out: + dev_put(netdev); + return ret; +} + +static int __init dcbnl_init(void) +{ + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + + return 0; +} +module_init(dcbnl_init); + +static void __exit dcbnl_exit(void) +{ + rtnl_unregister(PF_UNSPEC, RTM_GETDCB); + rtnl_unregister(PF_UNSPEC, RTM_SETDCB); +} +module_exit(dcbnl_exit); + + diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1e8be246ad1..01e4d39fa23 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,7 +12,6 @@ #include "ackvec.h" #include "dccp.h" -#include <linux/dccp.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN); + const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); u16 len = av->av_vec_len + 2 * nr_opts, i; u32 elapsed_time; const unsigned char *tail, *from; @@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) for (i = 0; i < nr_opts; ++i) { int copylen = len; - if (len > DCCP_MAX_ACKVEC_OPT_LEN) - copylen = DCCP_MAX_ACKVEC_OPT_LEN; + if (len > DCCP_SINGLE_OPT_MAXLEN) + copylen = DCCP_SINGLE_OPT_MAXLEN; *to++ = DCCPO_ACK_VECTOR_0; *to++ = copylen + 2; @@ -432,7 +431,7 @@ found: int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - if (len > DCCP_MAX_ACKVEC_OPT_LEN) + if (len > DCCP_SINGLE_OPT_MAXLEN) return -1; /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index bcb64fb4ace..4ccee030524 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -11,15 +11,14 @@ * published by the Free Software Foundation. */ +#include <linux/dccp.h> #include <linux/compiler.h> #include <linux/ktime.h> #include <linux/list.h> #include <linux/types.h> -/* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_OPT_LEN 253 /* We can spread an ack vector across multiple options */ -#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) +#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 8fe931a3d7a..bcc643f992a 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -13,6 +13,13 @@ #include "ccid.h" +static u8 builtin_ccids[] = { + DCCPC_CCID2, /* CCID2 is supported by default */ +#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) + DCCPC_CCID3, +#endif +}; + static struct ccid_operations *ccids[CCID_MAX]; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t ccids_lockct = ATOMIC_INIT(0); @@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } +/* check that up to @array_len members in @ccid_array are supported */ +bool ccid_support_check(u8 const *ccid_array, u8 array_len) +{ + u8 i, j, found; + + for (i = 0, found = 0; i < array_len; i++, found = 0) { + for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) + found = (ccid_array[i] == builtin_ccids[j]); + if (!found) + return false; + } + return true; +} + +/** + * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array + * @ccid_array: pointer to copy into + * @array_len: value to return length into + * This function allocates memory - caller must see that it is freed after use. + */ +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) +{ + *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); + if (*ccid_array == NULL) + return -ENOBUFS; + *array_len = ARRAY_SIZE(builtin_ccids); + return 0; +} + +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + if (len < sizeof(builtin_ccids)) + return -EINVAL; + + if (put_user(sizeof(builtin_ccids), optlen) || + copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) + return -EFAULT; + return 0; +} + int ccid_register(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; @@ -205,20 +253,6 @@ out_module_put: EXPORT_SYMBOL_GPL(ccid_new); -struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp) -{ - return ccid_new(id, sk, 1, gfp); -} - -EXPORT_SYMBOL_GPL(ccid_hc_rx_new); - -struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp) -{ - return ccid_new(id, sk, 0, gfp); -} - -EXPORT_SYMBOL_GPL(ccid_hc_tx_new); - static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) { struct ccid_operations *ccid_ops; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index fdeae7b5731..18f69423a70 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -103,13 +103,31 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } +extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); +extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); +extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *, int __user *); + extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); -extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, - gfp_t gfp); -extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk, - gfp_t gfp); +static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) +{ + struct ccid *ccid = dp->dccps_hc_rx_ccid; + + if (ccid == NULL || ccid->ccid_ops == NULL) + return -1; + return ccid->ccid_ops->ccid_id; +} + +static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) +{ + struct ccid *ccid = dp->dccps_hc_tx_ccid; + + if (ccid == NULL || ccid->ccid_ops == NULL) + return -1; + return ccid->ccid_ops->ccid_id; +} extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 9a430734530..c9ea19a4d85 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -25,7 +25,7 @@ /* * This implementation should follow RFC 4341 */ - +#include "../feat.h" #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" @@ -147,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > 0xFFFF) /* RFC 4340, 11.3 */ - val = 0xFFFF; + if (val > DCCPF_ACK_RATIO_MAX) + val = DCCPF_ACK_RATIO_MAX; if (val == dp->dccps_l_ack_ratio) return; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b4bc6e095a0..0bc4c9a02e1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -49,7 +49,7 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern atomic_t dccp_orphan_count; +extern struct percpu_counter dccp_orphan_count; extern void dccp_time_wait(struct sock *sk, int state, int timeo); @@ -98,9 +98,6 @@ extern int sysctl_dccp_retries2; extern int sysctl_dccp_feat_sequence_window; extern int sysctl_dccp_feat_rx_ccid; extern int sysctl_dccp_feat_tx_ccid; -extern int sysctl_dccp_feat_ack_ratio; -extern int sysctl_dccp_feat_send_ack_vector; -extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; @@ -252,7 +249,8 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); +extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, + struct sk_buff const *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); @@ -435,12 +433,19 @@ static inline int dccp_ack_pending(const struct sock *sk) const struct dccp_sock *dp = dccp_sk(sk); return dp->dccps_timestamp_echo != 0 || #ifdef CONFIG_IP_DCCP_ACKVEC - (dccp_msk(sk)->dccpms_send_ack_vector && + (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || #endif inet_csk_ack_scheduled(sk); } +extern int dccp_feat_finalise_settings(struct dccp_sock *dp); +extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); +extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, + struct sk_buff *skb); +extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); +extern void dccp_feat_list_purge(struct list_head *fn_list); + extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); extern int dccp_insert_option_elapsed_time(struct sock *sk, diff --git a/net/dccp/diag.c b/net/dccp/diag.c index d8a3509b26f..b21f261da75 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -29,11 +29,14 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_backoff = icsk->icsk_backoff; info->tcpi_pmtu = icsk->icsk_pmtu_cookie; - if (dccp_msk(sk)->dccpms_send_ack_vector) + if (dp->dccps_hc_rx_ackvec != NULL) info->tcpi_options |= TCPI_OPT_SACK; - ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); - ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); + if (dp->dccps_hc_rx_ccid != NULL) + ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); + + if (dp->dccps_hc_tx_ccid != NULL) + ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); } static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -45,7 +48,7 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, dccp_get_info(sk, _info); } -static struct inet_diag_handler dccp_diag_handler = { +static const struct inet_diag_handler dccp_diag_handler = { .idiag_hashinfo = &dccp_hashinfo, .idiag_get_info = dccp_diag_get_info, .idiag_type = DCCPDIAG_GETSOCK, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 933a0ecf8d4..30f9fb76b92 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1,11 +1,17 @@ /* * net/dccp/feat.c * - * An implementation of the DCCP protocol - * Andrea Bittau <a.bittau@cs.ucl.ac.uk> + * Feature negotiation for the DCCP protocol (RFC 4340, section 6) + * + * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk> + * Rewrote from scratch, some bits from earlier code by + * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> + * * * ASSUMPTIONS * ----------- + * o Feature negotiation is coordinated with connection setup (as in TCP), wild + * changes of parameters of an established connection are not supported. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -15,597 +21,1185 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - #include <linux/module.h> - #include "ccid.h" #include "feat.h" -#define DCCP_FEAT_SP_NOAGREE (-123) - -int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp) -{ - struct dccp_opt_pend *opt; - - dccp_feat_debug(type, feature, *val); - - if (len > 3) { - DCCP_WARN("invalid length %d\n", len); - return -EINVAL; - } - /* XXX add further sanity checks */ - - /* check if that feature is already being negotiated */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* ok we found a negotiation for this option already */ - if (opt->dccpop_feat == feature && opt->dccpop_type == type) { - dccp_pr_debug("Replacing old\n"); - /* replace */ - BUG_ON(opt->dccpop_val == NULL); - kfree(opt->dccpop_val); - opt->dccpop_val = val; - opt->dccpop_len = len; - opt->dccpop_conf = 0; - return 0; - } - } - - /* negotiation for a new feature */ - opt = kmalloc(sizeof(*opt), gfp); - if (opt == NULL) - return -ENOMEM; - - opt->dccpop_type = type; - opt->dccpop_feat = feature; - opt->dccpop_len = len; - opt->dccpop_val = val; - opt->dccpop_conf = 0; - opt->dccpop_sc = NULL; - - BUG_ON(opt->dccpop_val == NULL); - - list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending); - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_feat_change); - -static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) +/* + * Feature activation handlers. + * + * These all use an u64 argument, to provide enough room for NN/SP features. At + * this stage the negotiated values have been checked to be within their range. + */ +static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); - /* figure out if we are changing our CCID or the peer's */ - const int rx = type == DCCPO_CHANGE_R; - const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid; - struct ccid *new_ccid; + struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any()); - /* Check if nothing is being changed. */ - if (ccid_nr == new_ccid_nr) - return 0; - - new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC); if (new_ccid == NULL) return -ENOMEM; if (rx) { ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); dp->dccps_hc_rx_ccid = new_ccid; - dmsk->dccpms_rx_ccid = new_ccid_nr; } else { ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = new_ccid; - dmsk->dccpms_tx_ccid = new_ccid_nr; } + return 0; +} +static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) +{ + if (!rx) + dccp_msk(sk)->dccpms_sequence_window = seq_win; return 0; } -static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) +static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx) { - dccp_feat_debug(type, feat, val); + if (rx) + dccp_sk(sk)->dccps_r_ack_ratio = ratio; + else + dccp_sk(sk)->dccps_l_ack_ratio = ratio; + return 0; +} - switch (feat) { - case DCCPF_CCID: - return dccp_feat_update_ccid(sk, type, val); - default: - dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n", - dccp_feat_typename(type), feat); - break; +static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (rx) { + if (enable && dp->dccps_hc_rx_ackvec == NULL) { + dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any()); + if (dp->dccps_hc_rx_ackvec == NULL) + return -ENOMEM; + } else if (!enable) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } } return 0; } -static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, - u8 *rpref, u8 rlen) +static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx) { - struct dccp_sock *dp = dccp_sk(sk); - u8 *spref, slen, *res = NULL; - int i, j, rc, agree = 1; + if (!rx) + dccp_sk(sk)->dccps_send_ndp_count = (enable > 0); + return 0; +} - BUG_ON(rpref == NULL); +/* + * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that + * `rx' holds when the sending peer informs about his partial coverage via a + * ChangeR() option. In the other case, we are the sender and the receiver + * announces its coverage via ChangeL() options. The policy here is to honour + * such communication by enabling the corresponding partial coverage - but only + * if it has not been set manually before; the warning here means that all + * packets will be dropped. + */ +static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx) +{ + struct dccp_sock *dp = dccp_sk(sk); - /* check if we are the black sheep */ - if (dp->dccps_role == DCCP_ROLE_CLIENT) { - spref = rpref; - slen = rlen; - rpref = opt->dccpop_val; - rlen = opt->dccpop_len; - } else { - spref = opt->dccpop_val; - slen = opt->dccpop_len; + if (rx) + dp->dccps_pcrlen = cscov; + else { + if (dp->dccps_pcslen == 0) + dp->dccps_pcslen = cscov; + else if (cscov > dp->dccps_pcslen) + DCCP_WARN("CsCov %u too small, peer requires >= %u\n", + dp->dccps_pcslen, (u8)cscov); } - /* - * Now we have server preference list in spref and client preference in - * rpref - */ - BUG_ON(spref == NULL); - BUG_ON(rpref == NULL); + return 0; +} - /* FIXME sanity check vals */ +static const struct { + u8 feat_num; /* DCCPF_xxx */ + enum dccp_feat_type rxtx; /* RX or TX */ + enum dccp_feat_type reconciliation; /* SP or NN */ + u8 default_value; /* as in 6.4 */ + int (*activation_hdlr)(struct sock *sk, u64 val, bool rx); +/* + * Lookup table for location and type of features (from RFC 4340/4342) + * +--------------------------+----+-----+----+----+---------+-----------+ + * | Feature | Location | Reconc. | Initial | Section | + * | | RX | TX | SP | NN | Value | Reference | + * +--------------------------+----+-----+----+----+---------+-----------+ + * | DCCPF_CCID | | X | X | | 2 | 10 | + * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 | + * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 | + * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 | + * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 | + * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 | + * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 | + * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 | + * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 | + * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 | + * +--------------------------+----+-----+----+----+---------+-----------+ + */ +} dccp_feat_table[] = { + { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid }, + { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL }, + { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win }, + { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL }, + { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio}, + { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec }, + { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp }, + { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov}, + { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL }, + { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL }, +}; +#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table) + +/** + * dccp_feat_index - Hash function to map feature number into array position + * Returns consecutive array index or -1 if the feature is not understood. + */ +static int dccp_feat_index(u8 feat_num) +{ + /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */ + if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM) + return feat_num - 1; - /* Are values in any order? XXX Lame "algorithm" here */ - for (i = 0; i < slen; i++) { - for (j = 0; j < rlen; j++) { - if (spref[i] == rpref[j]) { - res = &spref[i]; - break; - } - } - if (res) - break; + /* + * Other features: add cases for new feature types here after adding + * them to the above table. + */ + switch (feat_num) { + case DCCPF_SEND_LEV_RATE: + return DCCP_FEAT_SUPPORTED_MAX - 1; } + return -1; +} - /* we didn't agree on anything */ - if (res == NULL) { - /* confirm previous value */ - switch (opt->dccpop_feat) { - case DCCPF_CCID: - /* XXX did i get this right? =P */ - if (opt->dccpop_type == DCCPO_CHANGE_L) - res = &dccp_msk(sk)->dccpms_tx_ccid; - else - res = &dccp_msk(sk)->dccpms_rx_ccid; - break; +static u8 dccp_feat_type(u8 feat_num) +{ + int idx = dccp_feat_index(feat_num); - default: - DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat); - /* XXX implement res */ - return -EFAULT; - } + if (idx < 0) + return FEAT_UNKNOWN; + return dccp_feat_table[idx].reconciliation; +} - dccp_pr_debug("Don't agree... reconfirming %d\n", *res); - agree = 0; /* this is used for mandatory options... */ - } +static int dccp_feat_default_value(u8 feat_num) +{ + int idx = dccp_feat_index(feat_num); + /* + * There are no default values for unknown features, so encountering a + * negative index here indicates a serious problem somewhere else. + */ + DCCP_BUG_ON(idx < 0); - /* need to put result and our preference list */ - rlen = 1 + opt->dccpop_len; - rpref = kmalloc(rlen, GFP_ATOMIC); - if (rpref == NULL) - return -ENOMEM; + return idx < 0 ? 0 : dccp_feat_table[idx].default_value; +} - *rpref = *res; - memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len); +static int __dccp_feat_activate(struct sock *sk, const int idx, + const bool is_local, dccp_feat_val const *fval) +{ + bool rx; + u64 val; - /* put it in the "confirm queue" */ - if (opt->dccpop_sc == NULL) { - opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC); - if (opt->dccpop_sc == NULL) { - kfree(rpref); - return -ENOMEM; + if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX) + return -1; + if (dccp_feat_table[idx].activation_hdlr == NULL) + return 0; + + if (fval == NULL) { + val = dccp_feat_table[idx].default_value; + } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) { + if (fval->sp.vec == NULL) { + /* + * This can happen when an empty Confirm is sent + * for an SP (i.e. known) feature. In this case + * we would be using the default anyway. + */ + DCCP_CRIT("Feature #%d undefined: using default", idx); + val = dccp_feat_table[idx].default_value; + } else { + val = fval->sp.vec[0]; } } else { - /* recycle the confirm slot */ - BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); - kfree(opt->dccpop_sc->dccpoc_val); - dccp_pr_debug("recycling confirm slot\n"); + val = fval->nn; } - memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc)); - opt->dccpop_sc->dccpoc_val = rpref; - opt->dccpop_sc->dccpoc_len = rlen; + /* Location is RX if this is a local-RX or remote-TX feature */ + rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); - /* update the option on our side [we are about to send the confirm] */ - rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res); - if (rc) { - kfree(opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc); - opt->dccpop_sc = NULL; - return rc; - } + return dccp_feat_table[idx].activation_hdlr(sk, val, rx); +} - dccp_pr_debug("Will confirm %d\n", *rpref); +/* Test for "Req'd" feature (RFC 4340, 6.4) */ +static inline int dccp_feat_must_be_understood(u8 feat_num) +{ + return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS || + feat_num == DCCPF_SEQUENCE_WINDOW; +} - /* say we want to change to X but we just got a confirm X, suppress our - * change - */ - if (!opt->dccpop_conf) { - if (*opt->dccpop_val == *res) - opt->dccpop_conf = 1; - dccp_pr_debug("won't ask for change of same feature\n"); +/* copy constructor, fval must not already contain allocated memory */ +static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) +{ + fval->sp.len = len; + if (fval->sp.len > 0) { + fval->sp.vec = kmemdup(val, len, gfp_any()); + if (fval->sp.vec == NULL) { + fval->sp.len = 0; + return -ENOBUFS; + } } + return 0; +} - return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */ +static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) +{ + if (unlikely(val == NULL)) + return; + if (dccp_feat_type(feat_num) == FEAT_SP) + kfree(val->sp.vec); + memset(val, 0, sizeof(*val)); } -static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +static struct dccp_feat_entry * + dccp_feat_clone_entry(struct dccp_feat_entry const *original) { - struct dccp_minisock *dmsk = dccp_msk(sk); - struct dccp_opt_pend *opt; - int rc = 1; - u8 t; + struct dccp_feat_entry *new; + u8 type = dccp_feat_type(original->feat_num); - /* - * We received a CHANGE. We gotta match it against our own preference - * list. If we got a CHANGE_R it means it's a change for us, so we need - * to compare our CHANGE_L list. - */ - if (type == DCCPO_CHANGE_L) - t = DCCPO_CHANGE_R; - else - t = DCCPO_CHANGE_L; + if (type == FEAT_UNKNOWN) + return NULL; - /* find our preference list for this feature */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - if (opt->dccpop_type != t || opt->dccpop_feat != feature) - continue; + new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any()); + if (new == NULL) + return NULL; - /* find the winner from the two preference lists */ - rc = dccp_feat_reconcile(sk, opt, val, len); - break; + if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val, + original->val.sp.vec, + original->val.sp.len)) { + kfree(new); + return NULL; } + return new; +} - /* We didn't deal with the change. This can happen if we have no - * preference list for the feature. In fact, it just shouldn't - * happen---if we understand a feature, we should have a preference list - * with at least the default value. - */ - BUG_ON(rc == 1); +static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry) +{ + if (entry != NULL) { + dccp_feat_val_destructor(entry->feat_num, &entry->val); + kfree(entry); + } +} - return rc; +/* + * List management functions + * + * Feature negotiation lists rely on and maintain the following invariants: + * - each feat_num in the list is known, i.e. we know its type and default value + * - each feat_num/is_local combination is unique (old entries are overwritten) + * - SP values are always freshly allocated + * - list is sorted in increasing order of feature number (faster lookup) + */ +static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list, + u8 feat_num, bool is_local) +{ + struct dccp_feat_entry *entry; + + list_for_each_entry(entry, fn_list, node) { + if (entry->feat_num == feat_num && entry->is_local == is_local) + return entry; + else if (entry->feat_num > feat_num) + break; + } + return NULL; } -static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +/** + * dccp_feat_entry_new - Central list update routine (called by all others) + * @head: list to add to + * @feat: feature number + * @local: whether the local (1) or remote feature with number @feat is meant + * This is the only constructor and serves to ensure the above invariants. + */ +static struct dccp_feat_entry * + dccp_feat_entry_new(struct list_head *head, u8 feat, bool local) { - struct dccp_opt_pend *opt; - struct dccp_minisock *dmsk = dccp_msk(sk); - u8 *copy; - int rc; + struct dccp_feat_entry *entry; + + list_for_each_entry(entry, head, node) + if (entry->feat_num == feat && entry->is_local == local) { + dccp_feat_val_destructor(entry->feat_num, &entry->val); + return entry; + } else if (entry->feat_num > feat) { + head = &entry->node; + break; + } - /* NN features must be Change L (sec. 6.3.2) */ - if (type != DCCPO_CHANGE_L) { - dccp_pr_debug("received %s for NN feature %d\n", - dccp_feat_typename(type), feature); - return -EFAULT; + entry = kmalloc(sizeof(*entry), gfp_any()); + if (entry != NULL) { + entry->feat_num = feat; + entry->is_local = local; + list_add_tail(&entry->node, head); } + return entry; +} - /* XXX sanity check opt val */ +/** + * dccp_feat_push_change - Add/overwrite a Change option in the list + * @fn_list: feature-negotiation list to update + * @feat: one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @needs_mandatory: whether to use Mandatory feature negotiation options + * @fval: pointer to NN/SP value to be inserted (will be copied) + */ +static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, + u8 mandatory, dccp_feat_val *fval) +{ + struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); - /* copy option so we can confirm it */ - opt = kzalloc(sizeof(*opt), GFP_ATOMIC); - if (opt == NULL) + if (new == NULL) return -ENOMEM; - copy = kmemdup(val, len, GFP_ATOMIC); - if (copy == NULL) { - kfree(opt); - return -ENOMEM; - } + new->feat_num = feat; + new->is_local = local; + new->state = FEAT_INITIALISING; + new->needs_confirm = 0; + new->empty_confirm = 0; + new->val = *fval; + new->needs_mandatory = mandatory; - opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */ - opt->dccpop_feat = feature; - opt->dccpop_val = copy; - opt->dccpop_len = len; + return 0; +} - /* change feature */ - rc = dccp_feat_update(sk, type, feature, *val); - if (rc) { - kfree(opt->dccpop_val); - kfree(opt); - return rc; - } +/** + * dccp_feat_push_confirm - Add a Confirm entry to the FN list + * @fn_list: feature-negotiation list to add to + * @feat: one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is being confirmed + * @fval: pointer to NN/SP value to be inserted or NULL + * Returns 0 on success, a Reset code for further processing otherwise. + */ +static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, + dccp_feat_val *fval) +{ + struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); - dccp_feat_debug(type, feature, *copy); + if (new == NULL) + return DCCP_RESET_CODE_TOO_BUSY; - list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); + new->feat_num = feat; + new->is_local = local; + new->state = FEAT_STABLE; /* transition in 6.6.2 */ + new->needs_confirm = 1; + new->empty_confirm = (fval == NULL); + new->val.nn = 0; /* zeroes the whole structure */ + if (!new->empty_confirm) + new->val = *fval; + new->needs_mandatory = 0; return 0; } -static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, - u8 type, u8 feature) +static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local) { - /* XXX check if other confirms for that are queued and recycle slot */ - struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC); + return dccp_feat_push_confirm(fn_list, feat, local, NULL); +} - if (opt == NULL) { - /* XXX what do we do? Ignoring should be fine. It's a change - * after all =P - */ - return; - } +static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry) +{ + list_del(&entry->node); + dccp_feat_entry_destructor(entry); +} - switch (type) { - case DCCPO_CHANGE_L: - opt->dccpop_type = DCCPO_CONFIRM_R; - break; - case DCCPO_CHANGE_R: - opt->dccpop_type = DCCPO_CONFIRM_L; - break; - default: - DCCP_WARN("invalid type %d\n", type); - kfree(opt); - return; +void dccp_feat_list_purge(struct list_head *fn_list) +{ + struct dccp_feat_entry *entry, *next; + + list_for_each_entry_safe(entry, next, fn_list, node) + dccp_feat_entry_destructor(entry); + INIT_LIST_HEAD(fn_list); +} +EXPORT_SYMBOL_GPL(dccp_feat_list_purge); + +/* generate @to as full clone of @from - @to must not contain any nodes */ +int dccp_feat_clone_list(struct list_head const *from, struct list_head *to) +{ + struct dccp_feat_entry *entry, *new; + + INIT_LIST_HEAD(to); + list_for_each_entry(entry, from, node) { + new = dccp_feat_clone_entry(entry); + if (new == NULL) + goto cloning_failed; + list_add_tail(&new->node, to); } - opt->dccpop_feat = feature; - opt->dccpop_val = NULL; - opt->dccpop_len = 0; + return 0; + +cloning_failed: + dccp_feat_list_purge(to); + return -ENOMEM; +} - /* change feature */ - dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature); +/** + * dccp_feat_valid_nn_length - Enforce length constraints on NN options + * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only, + * incoming options are accepted as long as their values are valid. + */ +static u8 dccp_feat_valid_nn_length(u8 feat_num) +{ + if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */ + return 2; + if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */ + return 6; + return 0; +} - list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); +static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val) +{ + switch (feat_num) { + case DCCPF_ACK_RATIO: + return val <= DCCPF_ACK_RATIO_MAX; + case DCCPF_SEQUENCE_WINDOW: + return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX; + } + return 0; /* feature unknown - so we can't tell */ } -static void dccp_feat_flush_confirm(struct sock *sk) +/* check that SP values are within the ranges defined in RFC 4340 */ +static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val) { - struct dccp_minisock *dmsk = dccp_msk(sk); - /* Check if there is anything to confirm in the first place */ - int yes = !list_empty(&dmsk->dccpms_conf); + switch (feat_num) { + case DCCPF_CCID: + return val == DCCPC_CCID2 || val == DCCPC_CCID3; + /* Type-check Boolean feature values: */ + case DCCPF_SHORT_SEQNOS: + case DCCPF_ECN_INCAPABLE: + case DCCPF_SEND_ACK_VECTOR: + case DCCPF_SEND_NDP_COUNT: + case DCCPF_DATA_CHECKSUM: + case DCCPF_SEND_LEV_RATE: + return val < 2; + case DCCPF_MIN_CSUM_COVER: + return val < 16; + } + return 0; /* feature unknown */ +} - if (!yes) { - struct dccp_opt_pend *opt; +static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len) +{ + if (sp_list == NULL || sp_len < 1) + return 0; + while (sp_len--) + if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++)) + return 0; + return 1; +} - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - if (opt->dccpop_conf) { - yes = 1; - break; +/** + * dccp_feat_insert_opts - Generate FN options from current list state + * @skb: next sk_buff to be sent to the peer + * @dp: for client during handshake and general negotiation + * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND) + */ +int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, + struct sk_buff *skb) +{ + struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; + struct dccp_feat_entry *pos, *next; + u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN]; + bool rpt; + + /* put entries into @skb in the order they appear in the list */ + list_for_each_entry_safe_reverse(pos, next, fn, node) { + opt = dccp_feat_genopt(pos); + type = dccp_feat_type(pos->feat_num); + rpt = false; + + if (pos->empty_confirm) { + len = 0; + ptr = NULL; + } else { + if (type == FEAT_SP) { + len = pos->val.sp.len; + ptr = pos->val.sp.vec; + rpt = pos->needs_confirm; + } else if (type == FEAT_NN) { + len = dccp_feat_valid_nn_length(pos->feat_num); + ptr = nn_in_nbo; + dccp_encode_value_var(pos->val.nn, ptr, len); + } else { + DCCP_BUG("unknown feature %u", pos->feat_num); + return -1; } } + + if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) + return -1; + if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) + return -1; + /* + * Enter CHANGING after transmitting the Change option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; } + return 0; +} - if (!yes) - return; +/** + * __feat_register_nn - Register new NN value on socket + * @fn: feature-negotiation list to register with + * @feat: an NN feature from %dccp_feature_numbers + * @mandatory: use Mandatory option if 1 + * @nn_val: value to register (restricted to 4 bytes) + * Note that NN features are local by definition (RFC 4340, 6.3.2). + */ +static int __feat_register_nn(struct list_head *fn, u8 feat, + u8 mandatory, u64 nn_val) +{ + dccp_feat_val fval = { .nn = nn_val }; - /* OK there is something to confirm... */ - /* XXX check if packet is in flight? Send delayed ack?? */ - if (sk->sk_state == DCCP_OPEN) - dccp_send_ack(sk); + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + /* Don't bother with default values, they will be activated anyway. */ + if (nn_val - (u64)dccp_feat_default_value(feat) == 0) + return 0; + + return dccp_feat_push_change(fn, feat, 1, mandatory, &fval); } -int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) +/** + * __feat_register_sp - Register new SP value/list on socket + * @fn: feature-negotiation list to register with + * @feat: an SP feature from %dccp_feature_numbers + * @is_local: whether the local (1) or the remote (0) @feat is meant + * @mandatory: use Mandatory option if 1 + * @sp_val: SP value followed by optional preference list + * @sp_len: length of @sp_val in bytes + */ +static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, + u8 mandatory, u8 const *sp_val, u8 sp_len) { - int rc; + dccp_feat_val fval; - dccp_feat_debug(type, feature, *val); + if (dccp_feat_type(feat) != FEAT_SP || + !dccp_feat_sp_list_ok(feat, sp_val, sp_len)) + return -EINVAL; - /* figure out if it's SP or NN feature */ - switch (feature) { - /* deal with SP features */ - case DCCPF_CCID: - rc = dccp_feat_sp(sk, type, feature, val, len); - break; + /* Avoid negotiating alien CCIDs by only advertising supported ones */ + if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len)) + return -EOPNOTSUPP; - /* deal with NN features */ - case DCCPF_ACK_RATIO: - rc = dccp_feat_nn(sk, type, feature, val, len); - break; + if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) + return -ENOMEM; - /* XXX implement other features */ - default: - dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n", - dccp_feat_typename(type), feature); - rc = -EFAULT; - break; - } + return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); +} - /* check if there were problems changing features */ - if (rc) { - /* If we don't agree on SP, we sent a confirm for old value. - * However we propagate rc to caller in case option was - * mandatory +/** + * dccp_feat_register_sp - Register requests to change SP feature values + * @sk: client or listening socket + * @feat: one of %dccp_feature_numbers + * @is_local: whether the local (1) or remote (0) @feat is meant + * @list: array of preferred values, in descending order of preference + * @len: length of @list in bytes + */ +int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len) +{ /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_SP) + return -EINVAL; + return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local, + 0, list, len); +} + +/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ +int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) +{ + /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_NN) + return -EINVAL; + return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); +} + +/* + * Tracking features whose value depend on the choice of CCID + * + * This is designed with an extension in mind so that a list walk could be done + * before activating any features. However, the existing framework was found to + * work satisfactorily up until now, the automatic verification is left open. + * When adding new CCIDs, add a corresponding dependency table here. + */ +static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local) +{ + static const struct ccid_dependency ccid2_dependencies[2][2] = { + /* + * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX + * feature and Send Ack Vector is an RX feature, `is_local' + * needs to be reversed. */ - if (rc != DCCP_FEAT_SP_NOAGREE) - dccp_feat_empty_confirm(dccp_msk(sk), type, feature); + { /* Dependencies of the receiver-side (remote) CCID2 */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = true, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 } + }, + { /* Dependencies of the sender-side (local) CCID2 */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 } + } + }; + static const struct ccid_dependency ccid3_dependencies[2][5] = { + { /* + * Dependencies of the receiver-side CCID3 + */ + { /* locally disable Ack Vectors */ + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = true, + .is_mandatory = false, + .val = 0 + }, + { /* see below why Send Loss Event Rate is on */ + .dependent_feat = DCCPF_SEND_LEV_RATE, + .is_local = true, + .is_mandatory = true, + .val = 1 + }, + { /* NDP Count is needed as per RFC 4342, 6.1.1 */ + .dependent_feat = DCCPF_SEND_NDP_COUNT, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { 0, 0, 0, 0 }, + }, + { /* + * CCID3 at the TX side: we request that the HC-receiver + * will not send Ack Vectors (they will be ignored, so + * Mandatory is not set); we enable Send Loss Event Rate + * (Mandatory since the implementation does not support + * the Loss Intervals option of RFC 4342, 8.6). + * The last two options are for peer's information only. + */ + { + .dependent_feat = DCCPF_SEND_ACK_VECTOR, + .is_local = false, + .is_mandatory = false, + .val = 0 + }, + { + .dependent_feat = DCCPF_SEND_LEV_RATE, + .is_local = false, + .is_mandatory = true, + .val = 1 + }, + { /* this CCID does not support Ack Ratio */ + .dependent_feat = DCCPF_ACK_RATIO, + .is_local = true, + .is_mandatory = false, + .val = 0 + }, + { /* tell receiver we are sending NDP counts */ + .dependent_feat = DCCPF_SEND_NDP_COUNT, + .is_local = true, + .is_mandatory = false, + .val = 1 + }, + { 0, 0, 0, 0 } + } + }; + switch (ccid) { + case DCCPC_CCID2: + return ccid2_dependencies[is_local]; + case DCCPC_CCID3: + return ccid3_dependencies[is_local]; + default: + return NULL; } +} - /* generate the confirm [if required] */ - dccp_feat_flush_confirm(sk); - +/** + * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID + * @fn: feature-negotiation list to update + * @id: CCID number to track + * @is_local: whether TX CCID (1) or RX CCID (0) is meant + * This function needs to be called after registering all other features. + */ +static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) +{ + const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local); + int i, rc = (table == NULL); + + for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++) + if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP) + rc = __feat_register_sp(fn, table[i].dependent_feat, + table[i].is_local, + table[i].is_mandatory, + &table[i].val, 1); + else + rc = __feat_register_nn(fn, table[i].dependent_feat, + table[i].is_mandatory, + table[i].val); return rc; } -EXPORT_SYMBOL_GPL(dccp_feat_change_recv); +/** + * dccp_feat_finalise_settings - Finalise settings before starting negotiation + * @dp: client or listening socket (settings will be inherited) + * This is called after all registrations (socket initialisation, sysctls, and + * sockopt calls), and before sending the first packet containing Change options + * (ie. client-Request or server-Response), to ensure internal consistency. + */ +int dccp_feat_finalise_settings(struct dccp_sock *dp) +{ + struct list_head *fn = &dp->dccps_featneg; + struct dccp_feat_entry *entry; + int i = 2, ccids[2] = { -1, -1 }; -int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, - u8 *val, u8 len) + /* + * Propagating CCIDs: + * 1) not useful to propagate CCID settings if this host advertises more + * than one CCID: the choice of CCID may still change - if this is + * the client, or if this is the server and the client sends + * singleton CCID values. + * 2) since is that propagate_ccid changes the list, we defer changing + * the sorted list until after the traversal. + */ + list_for_each_entry(entry, fn, node) + if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1) + ccids[entry->is_local] = entry->val.sp.vec[0]; + while (i--) + if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) + return -1; + return 0; +} + +/** + * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features + * It is the server which resolves the dependencies once the CCID has been + * fully negotiated. If no CCID has been negotiated, it uses the default CCID. + */ +int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq) { - u8 t; - struct dccp_opt_pend *opt; - struct dccp_minisock *dmsk = dccp_msk(sk); - int found = 0; - int all_confirmed = 1; + struct list_head *fn = &dreq->dreq_featneg; + struct dccp_feat_entry *entry; + u8 is_local, ccid; - dccp_feat_debug(type, feature, *val); + for (is_local = 0; is_local <= 1; is_local++) { + entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local); - /* locate our change request */ - switch (type) { - case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; - case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; - default: DCCP_WARN("invalid type %d\n", type); - return 1; + if (entry != NULL && !entry->empty_confirm) + ccid = entry->val.sp.vec[0]; + else + ccid = dccp_feat_default_value(DCCPF_CCID); + if (dccp_feat_propagate_ccid(fn, ccid, is_local)) + return -1; } - /* XXX sanity check feature value */ + return 0; +} - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - if (!opt->dccpop_conf && opt->dccpop_type == t && - opt->dccpop_feat == feature) { - found = 1; - dccp_pr_debug("feature %d found\n", opt->dccpop_feat); +/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */ +static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen) +{ + u8 c, s; - /* XXX do sanity check */ + for (s = 0; s < slen; s++) + for (c = 0; c < clen; c++) + if (servlist[s] == clilist[c]) + return servlist[s]; + return -1; +} - opt->dccpop_conf = 1; +/** + * dccp_feat_prefer - Move preferred entry to the start of array + * Reorder the @array_len elements in @array so that @preferred_value comes + * first. Returns >0 to indicate that @preferred_value does occur in @array. + */ +static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len) +{ + u8 i, does_occur = 0; - /* We got a confirmation---change the option */ - dccp_feat_update(sk, opt->dccpop_type, - opt->dccpop_feat, *val); + if (array != NULL) { + for (i = 0; i < array_len; i++) + if (array[i] == preferred_value) { + array[i] = array[0]; + does_occur++; + } + if (does_occur) + array[0] = preferred_value; + } + return does_occur; +} - /* XXX check the return value of dccp_feat_update */ - break; - } +/** + * dccp_feat_reconcile - Reconcile SP preference lists + * @fval: SP list to reconcile into + * @arr: received SP preference list + * @len: length of @arr in bytes + * @is_server: whether this side is the server (and @fv is the server's list) + * @reorder: whether to reorder the list in @fv after reconciling with @arr + * When successful, > 0 is returned and the reconciled list is in @fval. + * A value of 0 means that negotiation failed (no shared entry). + */ +static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len, + bool is_server, bool reorder) +{ + int rc; - if (!opt->dccpop_conf) - all_confirmed = 0; + if (!fv->sp.vec || !arr) { + DCCP_CRIT("NULL feature value or array"); + return 0; } - /* fix re-transmit timer */ - /* XXX gotta make sure that no option negotiation occurs during - * connection shutdown. Consider that the CLOSEREQ is sent and timer is - * on. if all options are confirmed it might kill timer which should - * remain alive until close is received. - */ - if (all_confirmed) { - dccp_pr_debug("clear feat negotiation timer %p\n", sk); - inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - } + if (is_server) + rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len); + else + rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len); - if (!found) - dccp_pr_debug("%s(%d, ...) never requested\n", - dccp_feat_typename(type), feature); - return 0; -} + if (!reorder) + return rc; + if (rc < 0) + return 0; -EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv); + /* + * Reorder list: used for activating features and in dccp_insert_fn_opt. + */ + return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len); +} -void dccp_feat_clean(struct dccp_minisock *dmsk) +/** + * dccp_feat_change_recv - Process incoming ChangeL/R options + * @fn: feature-negotiation list to update + * @is_mandatory: whether the Change was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R + * @feat: one of %dccp_feature_numbers + * @val: NN value or SP value/preference list + * @len: length of @val in bytes + * @server: whether this node is the server (1) or the client (0) + */ +static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, + u8 feat, u8 *val, u8 len, const bool server) { - struct dccp_opt_pend *opt, *next; + u8 defval, type = dccp_feat_type(feat); + const bool local = (opt == DCCPO_CHANGE_R); + struct dccp_feat_entry *entry; + dccp_feat_val fval; - list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending, - dccpop_node) { - BUG_ON(opt->dccpop_val == NULL); - kfree(opt->dccpop_val); + if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ + goto unknown_feature_or_value; + + /* + * Negotiation of NN features: Change R is invalid, so there is no + * simultaneous negotiation; hence we do not look up in the list. + */ + if (type == FEAT_NN) { + if (local || len > sizeof(fval.nn)) + goto unknown_feature_or_value; + + /* 6.3.2: "The feature remote MUST accept any valid value..." */ + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto unknown_feature_or_value; + + return dccp_feat_push_confirm(fn, feat, local, &fval); + } + + /* + * Unidirectional/simultaneous negotiation of SP features (6.3.1) + */ + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL) { + /* + * No particular preferences have been registered. We deal with + * this situation by assuming that all valid values are equally + * acceptable, and apply the following checks: + * - if the peer's list is a singleton, we accept a valid value; + * - if we are the server, we first try to see if the peer (the + * client) advertises the default value. If yes, we use it, + * otherwise we accept the preferred value; + * - else if we are the client, we use the first list element. + */ + if (dccp_feat_clone_sp_val(&fval, val, 1)) + return DCCP_RESET_CODE_TOO_BUSY; + + if (len > 1 && server) { + defval = dccp_feat_default_value(feat); + if (dccp_feat_preflist_match(&defval, 1, val, len) > -1) + fval.sp.vec[0] = defval; + } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) { + kfree(fval.sp.vec); + goto unknown_feature_or_value; + } - if (opt->dccpop_sc != NULL) { - BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); - kfree(opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc); + /* Treat unsupported CCIDs like invalid values */ + if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) { + kfree(fval.sp.vec); + goto not_valid_or_not_known; } - kfree(opt); + return dccp_feat_push_confirm(fn, feat, local, &fval); + + } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */ + return 0; } - INIT_LIST_HEAD(&dmsk->dccpms_pending); - list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { - BUG_ON(opt == NULL); - if (opt->dccpop_val != NULL) - kfree(opt->dccpop_val); - kfree(opt); + if (dccp_feat_reconcile(&entry->val, val, len, server, true)) { + entry->empty_confirm = 0; + } else if (is_mandatory) { + return DCCP_RESET_CODE_MANDATORY_ERROR; + } else if (entry->state == FEAT_INITIALISING) { + /* + * Failed simultaneous negotiation (server only): try to `save' + * the connection by checking whether entry contains the default + * value for @feat. If yes, send an empty Confirm to signal that + * the received Change was not understood - which implies using + * the default value. + * If this also fails, we use Reset as the last resort. + */ + WARN_ON(!server); + defval = dccp_feat_default_value(feat); + if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true)) + return DCCP_RESET_CODE_OPTION_ERROR; + entry->empty_confirm = 1; } - INIT_LIST_HEAD(&dmsk->dccpms_conf); -} + entry->needs_confirm = 1; + entry->needs_mandatory = 0; + entry->state = FEAT_STABLE; + return 0; -EXPORT_SYMBOL_GPL(dccp_feat_clean); +unknown_feature_or_value: + if (!is_mandatory) + return dccp_push_empty_confirm(fn, feat, local); -/* this is to be called only when a listening sock creates its child. It is - * assumed by the function---the confirm is not duplicated, but rather it is - * "passed on". +not_valid_or_not_known: + return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** + * dccp_feat_confirm_recv - Process received Confirm options + * @fn: feature-negotiation list to update + * @is_mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R + * @feat: one of %dccp_feature_numbers + * @val: NN value or SP value/preference list + * @len: length of @val in bytes + * @server: whether this node is server (1) or client (0) */ -int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) +static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt, + u8 feat, u8 *val, u8 len, const bool server) { - struct dccp_minisock *olddmsk = dccp_msk(oldsk); - struct dccp_minisock *newdmsk = dccp_msk(newsk); - struct dccp_opt_pend *opt; - int rc = 0; + u8 *plist, plen, type = dccp_feat_type(feat); + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); - INIT_LIST_HEAD(&newdmsk->dccpms_pending); - INIT_LIST_HEAD(&newdmsk->dccpms_conf); + if (entry == NULL) { /* nothing queued: ignore or handle error */ + if (is_mandatory && type == FEAT_UNKNOWN) + return DCCP_RESET_CODE_MANDATORY_ERROR; + + if (!local && type == FEAT_NN) /* 6.3.2 */ + goto confirmation_failed; + return 0; + } - list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) { - struct dccp_opt_pend *newopt; - /* copy the value of the option */ - u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC); + if (entry->state != FEAT_CHANGING) /* 6.6.2 */ + return 0; - if (val == NULL) - goto out_clean; + if (len == 0) { + if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */ + goto confirmation_failed; + /* + * Empty Confirm during connection setup: this means reverting + * to the `old' value, which in this case is the default. Since + * we handle default values automatically when no other values + * have been set, we revert to the old value by removing this + * entry from the list. + */ + dccp_feat_list_pop(entry); + return 0; + } - newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC); - if (newopt == NULL) { - kfree(val); - goto out_clean; - } + if (type == FEAT_NN) { + if (len > sizeof(entry->val.nn)) + goto confirmation_failed; - /* insert the option */ - newopt->dccpop_val = val; - list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending); + if (entry->val.nn == dccp_decode_value_var(val, len)) + goto confirmation_succeeded; - /* XXX what happens with backlogs and multiple connections at - * once... - */ - /* the master socket no longer needs to worry about confirms */ - opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */ + DCCP_WARN("Bogus Confirm for non-existing value\n"); + goto confirmation_failed; + } - /* reset state for a new socket */ - opt->dccpop_conf = 0; + /* + * Parsing SP Confirms: the first element of @val is the preferred + * SP value which the peer confirms, the remainder depends on @len. + * Note that only the confirmed value need to be a valid SP value. + */ + if (!dccp_feat_is_valid_sp_val(feat, *val)) + goto confirmation_failed; + + if (len == 1) { /* peer didn't supply a preference list */ + plist = val; + plen = len; + } else { /* preferred value + preference list */ + plist = val + 1; + plen = len - 1; } - /* XXX not doing anything about the conf queue */ + /* Check whether the peer got the reconciliation right (6.6.8) */ + if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) { + DCCP_WARN("Confirm selected the wrong value %u\n", *val); + return DCCP_RESET_CODE_OPTION_ERROR; + } + entry->val.sp.vec[0] = *val; -out: - return rc; +confirmation_succeeded: + entry->state = FEAT_STABLE; + return 0; -out_clean: - dccp_feat_clean(newdmsk); - rc = -ENOMEM; - goto out; +confirmation_failed: + DCCP_WARN("Confirmation failed\n"); + return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; } -EXPORT_SYMBOL_GPL(dccp_feat_clone); - -static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat, - u8 *val, u8 len) +/** + * dccp_feat_parse_options - Process Feature-Negotiation Options + * @sk: for general use and used by the client during connection setup + * @dreq: used by the server during connection setup + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R + * @feat: one of %dccp_feature_numbers + * @val: value contents of @opt + * @len: length of @val in bytes + * Returns 0 on success, a Reset code for ending the connection otherwise. + */ +int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, + u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len) { - int rc = -ENOMEM; - u8 *copy = kmemdup(val, len, GFP_KERNEL); + struct dccp_sock *dp = dccp_sk(sk); + struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; + bool server = false; - if (copy != NULL) { - rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL); - if (rc) - kfree(copy); + switch (sk->sk_state) { + /* + * Negotiation during connection setup + */ + case DCCP_LISTEN: + server = true; /* fall through */ + case DCCP_REQUESTING: + switch (opt) { + case DCCPO_CHANGE_L: + case DCCPO_CHANGE_R: + return dccp_feat_change_recv(fn, mandatory, opt, feat, + val, len, server); + case DCCPO_CONFIRM_R: + case DCCPO_CONFIRM_L: + return dccp_feat_confirm_recv(fn, mandatory, opt, feat, + val, len, server); + } } - return rc; + return 0; /* ignore FN options in all other states */ } -int dccp_feat_init(struct dccp_minisock *dmsk) +int dccp_feat_init(struct sock *sk) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); int rc; - INIT_LIST_HEAD(&dmsk->dccpms_pending); - INIT_LIST_HEAD(&dmsk->dccpms_conf); - - /* CCID L */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID, - &dmsk->dccpms_tx_ccid, 1); - if (rc) - goto out; - - /* CCID R */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID, - &dmsk->dccpms_rx_ccid, 1); - if (rc) - goto out; + INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ + INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ /* Ack ratio */ - rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO, - &dmsk->dccpms_ack_ratio, 1); -out: + rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, + dp->dccps_l_ack_ratio); return rc; } EXPORT_SYMBOL_GPL(dccp_feat_init); +int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *cur, *next; + int idx; + dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = { + [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL } + }; + + list_for_each_entry(cur, fn_list, node) { + /* + * An empty Confirm means that either an unknown feature type + * or an invalid value was present. In the first case there is + * nothing to activate, in the other the default value is used. + */ + if (cur->empty_confirm) + continue; + + idx = dccp_feat_index(cur->feat_num); + if (idx < 0) { + DCCP_BUG("Unknown feature %u", cur->feat_num); + goto activation_failed; + } + if (cur->state != FEAT_STABLE) { + DCCP_CRIT("Negotiation of %s %u failed in state %u", + cur->is_local ? "local" : "remote", + cur->feat_num, cur->state); + goto activation_failed; + } + fvals[idx][cur->is_local] = &cur->val; + } + + /* + * Activate in decreasing order of index, so that the CCIDs are always + * activated as the last feature. This avoids the case where a CCID + * relies on the initialisation of one or more features that it depends + * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features). + */ + for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;) + if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) || + __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) { + DCCP_CRIT("Could not activate %d", idx); + goto activation_failed; + } + + /* Clean up Change options which have been confirmed already */ + list_for_each_entry_safe(cur, next, fn_list, node) + if (!cur->needs_confirm) + dccp_feat_list_pop(cur); + + dccp_pr_debug("Activation OK\n"); + return 0; + +activation_failed: + /* + * We clean up everything that may have been allocated, since + * it is difficult to track at which stage negotiation failed. + * This is ok, since all allocation functions below are robust + * against NULL arguments. + */ + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + return -1; +} + #ifdef CONFIG_IP_DCCP_DEBUG const char *dccp_feat_typename(const u8 type) { @@ -639,6 +1233,8 @@ const char *dccp_feat_name(const u8 feat) if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) return feature_names[DCCPF_RESERVED]; + if (feat == DCCPF_SEND_LEV_RATE) + return "Send Loss Event Rate"; if (feat >= DCCPF_MIN_CCID_SPECIFIC) return "CCID-specific"; diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e272222c7ac..9b46e2a7866 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -3,17 +3,103 @@ /* * net/dccp/feat.h * - * An implementation of the DCCP protocol + * Feature negotiation for the DCCP protocol (RFC 4340, section 6) + * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk> * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk> * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. */ - #include <linux/types.h> #include "dccp.h" +/* + * Known limit values + */ +/* Ack Ratio takes 2-byte integer values (11.3) */ +#define DCCPF_ACK_RATIO_MAX 0xFFFF +/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */ +#define DCCPF_SEQ_WMIN 32 +#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull +/* Maximum number of SP values that fit in a single (Confirm) option */ +#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2) + +enum dccp_feat_type { + FEAT_AT_RX = 1, /* located at RX side of half-connection */ + FEAT_AT_TX = 2, /* located at TX side of half-connection */ + FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */ + FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */ + FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */ +}; + +enum dccp_feat_state { + FEAT_DEFAULT = 0, /* using default values from 6.4 */ + FEAT_INITIALISING, /* feature is being initialised */ + FEAT_CHANGING, /* Change sent but not confirmed yet */ + FEAT_UNSTABLE, /* local modification in state CHANGING */ + FEAT_STABLE /* both ends (think they) agree */ +}; + +/** + * dccp_feat_val - Container for SP or NN feature values + * @nn: single NN value + * @sp.vec: single SP value plus optional preference list + * @sp.len: length of @sp.vec in bytes + */ +typedef union { + u64 nn; + struct { + u8 *vec; + u8 len; + } sp; +} dccp_feat_val; + +/** + * struct feat_entry - Data structure to perform feature negotiation + * @val: feature's current value (SP features may have preference list) + * @state: feature's current state + * @feat_num: one of %dccp_feature_numbers + * @needs_mandatory: whether Mandatory options should be sent + * @needs_confirm: whether to send a Confirm instead of a Change + * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm) + * @is_local: feature location (1) or feature-remote (0) + * @node: list pointers, entries arranged in FIFO order + */ +struct dccp_feat_entry { + dccp_feat_val val; + enum dccp_feat_state state:8; + u8 feat_num; + + bool needs_mandatory, + needs_confirm, + empty_confirm, + is_local; + + struct list_head node; +}; + +static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry) +{ + if (entry->needs_confirm) + return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R; + return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R; +} + +/** + * struct ccid_dependency - Track changes resulting from choosing a CCID + * @dependent_feat: one of %dccp_feature_numbers + * @is_local: local (1) or remote (0) @dependent_feat + * @is_mandatory: whether presence of @dependent_feat is mission-critical or not + * @val: corresponding default value for @dependent_feat (u8 is sufficient here) + */ +struct ccid_dependency { + u8 dependent_feat; + bool is_local:1, + is_mandatory:1; + u8 val; +}; + #ifdef CONFIG_IP_DCCP_DEBUG extern const char *dccp_feat_typename(const u8 type); extern const char *dccp_feat_name(const u8 feat); @@ -27,14 +113,30 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #define dccp_feat_debug(type, feat, val) #endif /* CONFIG_IP_DCCP_DEBUG */ -extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp); -extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, - u8 *val, u8 len); -extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, - u8 *val, u8 len); -extern void dccp_feat_clean(struct dccp_minisock *dmsk); -extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); -extern int dccp_feat_init(struct dccp_minisock *dmsk); +extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len); +extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); +extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, + u8 mand, u8 opt, u8 feat, u8 *val, u8 len); +extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); +extern int dccp_feat_init(struct sock *sk); + +/* + * Encoding variable-length options and their maximum length. + * + * This affects NN options (SP options are all u8) and other variable-length + * options (see table 3 in RFC 4340). The limit is currently given the Sequence + * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other + * options consume less than 6 bytes (timestamps are 4 bytes). + * When updating this constant (e.g. due to new internet drafts / RFCs), make + * sure that you also update all code which refers to it. + */ +#define DCCP_OPTVAL_MAXLEN 6 + +extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); +extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern int dccp_insert_option_mandatory(struct sk_buff *skb); +extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len, bool repeat_first); #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 779d0ed9ae9..5eb443f656c 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - if (dccp_msk(sk)->dccpms_send_ack_vector) + if (dp->dccps_hc_rx_ackvec != NULL) dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_ack_seq); } @@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) @@ -421,20 +421,19 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, goto out_invalid_packet; } + /* + * If option processing (Step 8) failed, return 1 here so that + * dccp_v4_do_rcv() sends a Reset. The Reset code depends on + * the option type and is set in dccp_parse_options(). + */ if (dccp_parse_options(sk, NULL, skb)) - goto out_invalid_packet; + return 1; /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); - if (dccp_msk(sk)->dccpms_send_ack_vector && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto out_invalid_packet; /* FIXME: change error code */ - /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); WARN_ON(sk->sk_send_head == NULL); @@ -475,6 +474,15 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, */ dccp_set_state(sk, DCCP_PARTOPEN); + /* + * If feature negotiation was successful, activate features now; + * an activation failure means that this host could not activate + * one ore more features (e.g. insufficient memory), which would + * leave at least one feature in an undefined state. + */ + if (dccp_feat_activate_values(sk, &dp->dccps_featneg)) + goto unable_to_proceed; + /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); @@ -509,6 +517,16 @@ out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; return 1; + +unable_to_proceed: + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED; + /* + * We mark this socket as no longer usable, so that the loop in + * dccp_sendmsg() terminates and the application gets notified. + */ + dccp_set_state(sk, DCCP_CLOSED); + sk->sk_err = ECOMM; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -590,8 +608,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - - /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) @@ -602,7 +618,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - if (sk->sk_state != DCCP_REQUESTING) { + if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { if (dccp_check_seqno(sk, skb)) goto discard; @@ -615,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) @@ -667,8 +683,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; case DCCP_REQUESTING: - /* FIXME: do congestion control initialization */ - queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) return queued; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e3dfddab21c..d1dd95289b8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -545,6 +545,7 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); kfree(inet_rsk(req)->opt); } @@ -595,7 +596,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) @@ -792,12 +794,10 @@ static int dccp_v4_rcv(struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; - dccp_pr_debug("%8.8s " - "src=%u.%u.%u.%u@%-5d " - "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu", dccp_packet_name(dh->dccph_type), - NIPQUAD(iph->saddr), ntohs(dh->dccph_sport), - NIPQUAD(iph->daddr), ntohs(dh->dccph_dport), + &iph->saddr, ntohs(dh->dccph_sport), + &iph->daddr, ntohs(dh->dccph_dport), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { @@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d4ce1224e00..b963f35c65f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; @@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); if (err < 0) goto done; @@ -304,6 +304,7 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); if (inet6_rsk(req)->pktopts != NULL) kfree_skb(inet6_rsk(req)->pktopts); } @@ -342,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); @@ -426,7 +427,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) @@ -567,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1002,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); @@ -1138,6 +1140,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e6bf99e3e41..6821ae33dd3 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -45,11 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; - dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; - dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; - dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } void dccp_time_wait(struct sock *sk, int state, int timeo) @@ -112,7 +107,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); if (newsk != NULL) { - const struct dccp_request_sock *dreq = dccp_rsk(req); + struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); struct dccp_minisock *newdmsk = dccp_msk(newsk); @@ -125,35 +120,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - if (dccp_feat_clone(sk, newsk)) - goto out_free; - - if (newdmsk->dccpms_send_ack_vector) { - newdp->dccps_hc_rx_ackvec = - dccp_ackvec_alloc(GFP_ATOMIC); - if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) - goto out_free; - } - - newdp->dccps_hc_rx_ccid = - ccid_hc_rx_new(newdmsk->dccpms_rx_ccid, - newsk, GFP_ATOMIC); - newdp->dccps_hc_tx_ccid = - ccid_hc_tx_new(newdmsk->dccpms_tx_ccid, - newsk, GFP_ATOMIC); - if (unlikely(newdp->dccps_hc_rx_ccid == NULL || - newdp->dccps_hc_tx_ccid == NULL)) { - dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); - ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk); - ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk); -out_free: - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; - sk_free(newsk); - return NULL; - } - + INIT_LIST_HEAD(&newdp->dccps_featneg); /* * Step 3: Process LISTEN state * @@ -184,6 +151,17 @@ out_free: dccp_set_seqno(&newdp->dccps_awl, max48(newdp->dccps_awl, newdp->dccps_iss)); + /* + * Activate features after initialising the sequence numbers, + * since CCID initialisation may depend on GSS, ISR, ISS etc. + */ + if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + return NULL; + } dccp_init_xmit_timers(newsk); DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); @@ -304,7 +282,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); -void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) +int dccp_reqsk_init(struct request_sock *req, + struct dccp_sock const *dp, struct sk_buff const *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); @@ -313,6 +292,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; + + /* inherit feature negotiation options from listening socket */ + return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c index 0809b63cb05..7b1165c21f5 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,20 +26,21 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; -int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; -int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +u64 dccp_decode_value_var(const u8 *bf, const u8 len) { - u32 value = 0; + u64 value = 0; + if (len >= DCCP_OPTVAL_MAXLEN) + value += ((u64)*bf++) << 40; + if (len > 4) + value += ((u64)*bf++) << 32; if (len > 3) - value += *bf++ << 24; + value += ((u64)*bf++) << 24; if (len > 2) - value += *bf++ << 16; + value += ((u64)*bf++) << 16; if (len > 1) - value += *bf++ << 8; + value += ((u64)*bf++) << 8; if (len > 0) value += *bf; @@ -64,7 +65,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; - unsigned char *value; + unsigned char *uninitialized_var(value); u32 elapsed_time; __be32 opt_val; int rc; @@ -131,41 +132,19 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), (unsigned long long)opt_recv->dccpor_ndp); break; - case DCCPO_CHANGE_L: - /* fall through */ - case DCCPO_CHANGE_R: - if (pkt_type == DCCP_PKT_DATA) + case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: + if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; - if (len < 2) - goto out_invalid_option; - rc = dccp_feat_change_recv(sk, opt, *value, value + 1, - len - 1); - /* - * When there is a change error, change_recv is - * responsible for dealing with it. i.e. reply with an - * empty confirm. - * If the change was mandatory, then we need to die. - */ - if (rc && mandatory) - goto out_invalid_option; - break; - case DCCPO_CONFIRM_L: - /* fall through */ - case DCCPO_CONFIRM_R: - if (pkt_type == DCCP_PKT_DATA) - break; - if (len < 2) /* FIXME this disallows empty confirm */ - goto out_invalid_option; - if (dccp_feat_confirm_recv(sk, opt, *value, - value + 1, len - 1)) - goto out_invalid_option; + rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, + *value, value + 1, len - 1); + if (rc) + goto out_featneg_failed; break; case DCCPO_ACK_VECTOR_0: case DCCPO_ACK_VECTOR_1: if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ break; - - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) goto out_invalid_option; break; @@ -289,8 +268,10 @@ out_nonsensical_length: out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; - DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + rc = DCCP_RESET_CODE_OPTION_ERROR; +out_featneg_failed: + DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); + DCCP_SKB_CB(skb)->dccpd_reset_code = rc; DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; @@ -299,9 +280,12 @@ out_invalid_option: EXPORT_SYMBOL_GPL(dccp_parse_options); -static void dccp_encode_value_var(const u32 value, unsigned char *to, - const unsigned int len) +void dccp_encode_value_var(const u64 value, u8 *to, const u8 len) { + if (len >= DCCP_OPTVAL_MAXLEN) + *to++ = (value & 0xFF0000000000ull) >> 40; + if (len > 4) + *to++ = (value & 0xFF00000000ull) >> 32; if (len > 3) *to++ = (value & 0xFF000000) >> 24; if (len > 2) @@ -461,23 +445,61 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } -static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len) +/** + * dccp_insert_option_mandatory - Mandatory option (5.8.2) + * Note that since we are using skb_push, this function needs to be called + * _after_ inserting the option it is supposed to influence (stack order). + */ +int dccp_insert_option_mandatory(struct sk_buff *skb) +{ + if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len++; + *skb_push(skb, 1) = DCCPO_MANDATORY; + return 0; +} + +/** + * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb + * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R + * @feat: one out of %dccp_feature_numbers + * @val: NN value or SP array (preferred element first) to copy + * @len: true length of @val in bytes (excluding first element repetition) + * @repeat_first: whether to copy the first element of @val twice + * The last argument is used to construct Confirm options, where the preferred + * value and the preference list appear separately (RFC 4340, 6.3.1). Preference + * lists are kept such that the preferred entry is always first, so we only need + * to copy twice, and avoid the overhead of cloning into a bigger array. + */ +int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len, bool repeat_first) { - u8 *to; + u8 tot_len, *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { - DCCP_WARN("packet too small for feature %d option!\n", feat); + /* take the `Feature' field and possible repetition into account */ + if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) { + DCCP_WARN("length %u for feature %u too large\n", len, feat); return -1; } - DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3; + if (unlikely(val == NULL || len == 0)) + len = repeat_first = 0; + tot_len = 3 + repeat_first + len; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("packet too small for feature %d option!\n", feat); + return -1; + } + DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len; - to = skb_push(skb, len + 3); + to = skb_push(skb, tot_len); *to++ = type; - *to++ = len + 3; + *to++ = tot_len; *to++ = feat; + if (repeat_first) + *to++ = *val; if (len) memcpy(to, val, len); @@ -487,69 +509,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, return 0; } -static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); - struct dccp_opt_pend *opt, *next; - int change = 0; - - /* confirm any options [NN opts] */ - list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - /* fear empty confirms */ - if (opt->dccpop_val) - kfree(opt->dccpop_val); - kfree(opt); - } - INIT_LIST_HEAD(&dmsk->dccpms_conf); - - /* see which features we need to send */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* see if we need to send any confirm */ - if (opt->dccpop_sc) { - dccp_insert_feat_opt(skb, opt->dccpop_type + 1, - opt->dccpop_feat, - opt->dccpop_sc->dccpoc_val, - opt->dccpop_sc->dccpoc_len); - - BUG_ON(!opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc); - opt->dccpop_sc = NULL; - } - - /* any option not confirmed, re-send it */ - if (!opt->dccpop_conf) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - change++; - } - } - - /* Retransmit timer. - * If this is the master listening sock, we don't set a timer on it. It - * should be fine because if the dude doesn't receive our RESPONSE - * [which will contain the CHANGE] he will send another REQUEST which - * will "retrnasmit" the change. - */ - if (change && dp->dccps_role != DCCP_ROLE_LISTEN) { - dccp_pr_debug("reset feat negotiation timer %p\n", sk); - - /* XXX don't reset the timer on re-transmissions. I.e. reset it - * only when sending new stuff i guess. Currently the timer - * never backs off because on re-transmission it just resets it! - */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - } - - return 0; -} - /* The length of all options needs to be a multiple of 4 (5.8) */ static void dccp_insert_option_padding(struct sk_buff *skb) { @@ -565,19 +524,31 @@ static void dccp_insert_option_padding(struct sk_buff *skb) int dccp_insert_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - if (dmsk->dccpms_send_ndp_count && - dccp_insert_option_ndp(sk, skb)) + if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb)) return -1; - if (!dccp_packet_without_ack(skb)) { - if (dmsk->dccpms_send_ack_vector && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && - dccp_insert_option_ackvec(sk, skb)) + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { + + /* Feature Negotiation */ + if (dccp_feat_insert_opts(dp, NULL, skb)) return -1; + + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { + /* + * Obtain RTT sample from Request/Response exchange. + * This is currently used in CCID 3 initialisation. + */ + if (dccp_insert_option_timestamp(sk, skb)) + return -1; + + } else if (dp->dccps_hc_rx_ackvec != NULL && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && + dccp_insert_option_ackvec(sk, skb)) { + return -1; + } } if (dp->dccps_hc_rx_insert_options) { @@ -586,21 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_rx_insert_options = 0; } - /* Feature negotiation */ - /* Data packets can't do feat negotiation */ - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA && - DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK && - dccp_insert_options_feat(sk, skb)) - return -1; - - /* - * Obtain RTT sample from Request/Response exchange. - * This is currently used in CCID 3 initialisation. - */ - if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST && - dccp_insert_option_timestamp(sk, skb)) - return -1; - if (dp->dccps_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(dp, NULL, skb)) return -1; @@ -613,6 +569,9 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) { DCCP_SKB_CB(skb)->dccpd_opt_len = 0; + if (dccp_feat_insert_opts(NULL, dreq, skb)) + return -1; + if (dreq->dreq_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(NULL, dreq, skb)) return -1; diff --git a/net/dccp/output.c b/net/dccp/output.c index 809d803d500..22a618af489 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -175,7 +175,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) * make it a multiple of 4 */ - cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; + cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; @@ -339,10 +339,12 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; - if (dccp_insert_options_rsk(dreq, skb)) { - kfree_skb(skb); - return NULL; - } + /* Resolve feature dependencies resulting from choice of CCID */ + if (dccp_feat_server_ccid_dependencies(dreq)) + goto response_failed; + + if (dccp_insert_options_rsk(dreq, skb)) + goto response_failed; /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); @@ -363,6 +365,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; +response_failed: + kfree_skb(skb); + return NULL; } EXPORT_SYMBOL_GPL(dccp_make_response); @@ -469,6 +474,10 @@ int dccp_connect(struct sock *sk) struct sk_buff *skb; struct inet_connection_sock *icsk = inet_csk(sk); + /* do not connect if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dccp_sk(sk))) + return -EPROTO; + dccp_connect_init(sk); skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 81368a7f537..37731da4148 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -74,30 +74,27 @@ static void printl(const char *fmt, ...) static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - const struct dccp_minisock *dmsk = dccp_msk(sk); const struct inet_sock *inet = inet_sk(sk); - const struct ccid3_hc_tx_sock *hctx; + struct ccid3_hc_tx_sock *hctx = NULL; - if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) hctx = ccid3_hc_tx_sk(sk); - else - hctx = NULL; if (port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) { if (hctx) - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u " + printl("%pI4:%u %pI4:%u %d %d %d %d %u " "%llu %llu %d\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), size, + &inet->saddr, ntohs(inet->sport), + &inet->daddr, ntohs(inet->dport), size, hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, hctx->ccid3hctx_x_recv >> 6, hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi); else - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), size); + printl("%pI4:%u %pI4:%u %d\n", + &inet->saddr, ntohs(inet->sport), + &inet->daddr, ntohs(inet->dport), size); } jprobe_return(); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d0bd3481976..d5c2bacb713 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -40,16 +40,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - +struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ @@ -67,6 +61,9 @@ void dccp_set_state(struct sock *sk, const int state) case DCCP_OPEN: if (oldstate != DCCP_OPEN) DCCP_INC_STATS(DCCP_MIB_CURRESTAB); + /* Client retransmits all Confirm options until entering OPEN */ + if (oldstate == DCCP_PARTOPEN) + dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); break; case DCCP_CLOSED: @@ -175,7 +172,6 @@ EXPORT_SYMBOL_GPL(dccp_state_name); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); @@ -193,45 +189,10 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dccp_init_xmit_timers(sk); - /* - * FIXME: We're hardcoding the CCID, and doing this at this point makes - * the listening (master) sock get CCID control blocks, which is not - * necessary, but for now, to not mess with the test userspace apps, - * lets leave it here, later the real solution is to do this in a - * setsockopt(CCIDs-I-want/accept). -acme - */ - if (likely(ctl_sock_initialized)) { - int rc = dccp_feat_init(dmsk); - - if (rc) - return rc; - - if (dmsk->dccpms_send_ack_vector) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; - } - dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, - sk, GFP_KERNEL); - dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, - sk, GFP_KERNEL); - if (unlikely(dp->dccps_hc_rx_ccid == NULL || - dp->dccps_hc_tx_ccid == NULL)) { - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - if (dmsk->dccpms_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - return -ENOMEM; - } - } else { - /* control socket doesn't need feat nego */ - INIT_LIST_HEAD(&dmsk->dccpms_pending); - INIT_LIST_HEAD(&dmsk->dccpms_conf); - } - + INIT_LIST_HEAD(&dp->dccps_featneg); + /* control socket doesn't need feat nego */ + if (likely(ctl_sock_initialized)) + return dccp_feat_init(sk); return 0; } @@ -240,7 +201,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock); void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); /* * DCCP doesn't use sk_write_queue, just sk_send_head @@ -258,7 +218,7 @@ void dccp_destroy_sock(struct sock *sk) kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; - if (dmsk->dccpms_send_ack_vector) { + if (dp->dccps_hc_rx_ackvec != NULL) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } @@ -267,7 +227,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; /* clean up feature negotiation state */ - dccp_feat_clean(dmsk); + dccp_feat_list_purge(&dp->dccps_featneg); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -277,6 +237,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; + /* do not start to listen if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dp)) + return -EPROTO; return inet_csk_listen_start(sk, backlog); } @@ -466,42 +429,70 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } -/* byte 1 is feature. the rest is the preference list */ -static int dccp_setsockopt_change(struct sock *sk, int type, - struct dccp_so_feat __user *optval) +static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) { - struct dccp_so_feat opt; - u8 *val; - int rc; + u8 *list, len; + int i, rc; - if (copy_from_user(&opt, optval, sizeof(opt))) - return -EFAULT; + if (cscov < 0 || cscov > 15) + return -EINVAL; /* - * rfc4340: 6.1. Change Options + * Populate a list of permissible values, in the range cscov...15. This + * is necessary since feature negotiation of single values only works if + * both sides incidentally choose the same value. Since the list starts + * lowest-value first, negotiation will pick the smallest shared value. */ - if (opt.dccpsf_len < 1) + if (cscov == 0) + return 0; + len = 16 - cscov; + + list = kmalloc(len, GFP_KERNEL); + if (list == NULL) + return -ENOBUFS; + + for (i = 0; i < len; i++) + list[i] = cscov++; + + rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); + + if (rc == 0) { + if (rx) + dccp_sk(sk)->dccps_pcrlen = cscov; + else + dccp_sk(sk)->dccps_pcslen = cscov; + } + kfree(list); + return rc; +} + +static int dccp_setsockopt_ccid(struct sock *sk, int type, + char __user *optval, int optlen) +{ + u8 *val; + int rc = 0; + + if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; - val = kmalloc(opt.dccpsf_len, GFP_KERNEL); - if (!val) + val = kmalloc(optlen, GFP_KERNEL); + if (val == NULL) return -ENOMEM; - if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { - rc = -EFAULT; - goto out_free_val; + if (copy_from_user(val, optval, optlen)) { + kfree(val); + return -EFAULT; } - rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, - val, opt.dccpsf_len, GFP_KERNEL); - if (rc) - goto out_free_val; + lock_sock(sk); + if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); -out: - return rc; + if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); + release_sock(sk); -out_free_val: kfree(val); - goto out; + return rc; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, @@ -510,7 +501,21 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; - if (optlen < sizeof(int)) + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CHANGE_L: + case DCCP_SOCKOPT_CHANGE_R: + DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CCID: + case DCCP_SOCKOPT_RX_CCID: + case DCCP_SOCKOPT_TX_CCID: + return dccp_setsockopt_ccid(sk, optname, optval, optlen); + } + + if (optlen < (int)sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) @@ -521,53 +526,24 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - err = 0; - break; - case DCCP_SOCKOPT_CHANGE_L: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat __user *) - optval); - break; - case DCCP_SOCKOPT_CHANGE_R: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat __user *) - optval); - break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; - case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ - if (val < 0 || val > 15) - err = -EINVAL; - else - dp->dccps_pcslen = val; + case DCCP_SOCKOPT_SEND_CSCOV: + err = dccp_setsockopt_cscov(sk, val, false); break; - case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ - if (val < 0 || val > 15) - err = -EINVAL; - else { - dp->dccps_pcrlen = val; - /* FIXME: add feature negotiation, - * ChangeL(MinimumChecksumCoverage, val) */ - } + case DCCP_SOCKOPT_RECV_CSCOV: + err = dccp_setsockopt_cscov(sk, val, true); break; default: err = -ENOPROTOOPT; break; } - release_sock(sk); + return err; } @@ -648,6 +624,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; + case DCCP_SOCKOPT_AVAILABLE_CCIDS: + return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); + case DCCP_SOCKOPT_TX_CCID: + val = ccid_get_current_tx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; + case DCCP_SOCKOPT_RX_CCID: + val = ccid_get_current_rx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; @@ -976,7 +964,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -1040,17 +1028,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out; + rc = -ENOBUFS; + inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1084,8 +1076,8 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&dccp_hashinfo)) @@ -1143,6 +1135,8 @@ out_free_dccp_ehash: out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_hashinfo.bind_bucket_cachep = NULL; +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); goto out; } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 21295993fdb..018e210875e 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -41,27 +41,6 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "ack_ratio", - .data = &sysctl_dccp_feat_ack_ratio, - .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "send_ackvec", - .data = &sysctl_dccp_feat_send_ack_vector, - .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "send_ndp", - .data = &sysctl_dccp_feat_send_ndp_count, - .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "request_retries", .data = &sysctl_dccp_request_retries, .maxlen = sizeof(sysctl_dccp_request_retries), diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 54b3c7e9e01..162d1e683c3 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -87,17 +87,6 @@ static void dccp_retransmit_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - /* retransmit timer is used for feature negotiation throughout - * connection. In this case, no packet is re-transmitted, but rather an - * ack is generated and pending changes are placed into its options. - */ - if (sk->sk_send_head == NULL) { - dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); - if (sk->sk_state == DCCP_OPEN) - dccp_send_ack(sk); - goto backoff; - } - /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. @@ -126,7 +115,6 @@ static void dccp_retransmit_timer(struct sock *sk) return; } -backoff: icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3c23ab33dbc..cf0e1849929 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -167,7 +167,7 @@ static struct hlist_head *dn_find_list(struct sock *sk) if (scp->addr.sdn_flags & SDF_WILD) return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK]; + return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK]; } /* @@ -181,7 +181,7 @@ static int check_port(__le16 port) if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -195,12 +195,12 @@ static unsigned short port_alloc(struct sock *sk) static unsigned short port = 0x2000; unsigned short i_port = port; - while(check_port(dn_htons(++port)) != 0) { + while(check_port(cpu_to_le16(++port)) != 0) { if (port == i_port) return 0; } - scp->addrloc = dn_htons(port); + scp->addrloc = cpu_to_le16(port); return 1; } @@ -255,7 +255,7 @@ static struct hlist_head *listen_hash(struct sockaddr_dn *addr) if (hash == 0) { hash = addr->sdn_objnamel; - for(i = 0; i < dn_ntohs(addr->sdn_objnamel); i++) { + for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) { hash ^= addr->sdn_objname[i]; hash ^= (hash << 3); } @@ -297,16 +297,16 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c break; case 1: *buf++ = 0; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 3 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 3 + le16_to_cpu(sdn->sdn_objnamel); break; case 2: memset(buf, 0, 5); buf += 5; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 7 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 7 + le16_to_cpu(sdn->sdn_objnamel); break; } @@ -327,7 +327,7 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, int namel = 12; sdn->sdn_objnum = 0; - sdn->sdn_objnamel = dn_htons(0); + sdn->sdn_objnamel = cpu_to_le16(0); memset(sdn->sdn_objname, 0, DN_MAXOBJL); if (len < 2) @@ -361,13 +361,13 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, if (len < 0) return -1; - sdn->sdn_objnamel = dn_htons(*data++); - len -= dn_ntohs(sdn->sdn_objnamel); + sdn->sdn_objnamel = cpu_to_le16(*data++); + len -= le16_to_cpu(sdn->sdn_objnamel); - if ((len < 0) || (dn_ntohs(sdn->sdn_objnamel) > namel)) + if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel)) return -1; - memcpy(sdn->sdn_objname, data, dn_ntohs(sdn->sdn_objnamel)); + memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel)); return size - len; } @@ -391,7 +391,7 @@ struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) continue; if (scp->addr.sdn_objnamel != addr->sdn_objnamel) continue; - if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, dn_ntohs(addr->sdn_objnamel)) != 0) + if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0) continue; } sock_hold(sk); @@ -419,7 +419,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -734,10 +734,10 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (saddr->sdn_family != AF_DECnet) return -EINVAL; - if (dn_ntohs(saddr->sdn_nodeaddrl) && (dn_ntohs(saddr->sdn_nodeaddrl) != 2)) + if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2)) return -EINVAL; - if (dn_ntohs(saddr->sdn_objnamel) > DN_MAXOBJL) + if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL) return -EINVAL; if (saddr->sdn_flags & ~SDF_WILD) @@ -748,7 +748,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EACCES; if (!(saddr->sdn_flags & SDF_WILD)) { - if (dn_ntohs(saddr->sdn_nodeaddrl)) { + if (le16_to_cpu(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; for_each_netdev(&init_net, dev) { @@ -799,15 +799,15 @@ static int dn_auto_bind(struct socket *sock) if ((scp->accessdata.acc_accl != 0) && (scp->accessdata.acc_accl <= 12)) { - scp->addr.sdn_objnamel = dn_htons(scp->accessdata.acc_accl); - memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, dn_ntohs(scp->addr.sdn_objnamel)); + scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl); + memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel)); scp->accessdata.acc_accl = 0; memset(scp->accessdata.acc_acc, 0, 40); } /* End of compatibility stuff */ - scp->addr.sdn_add.a_len = dn_htons(2); + scp->addr.sdn_add.a_len = cpu_to_le16(2); rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr); if (rv == 0) { rv = dn_hash_sock(sk); @@ -1027,7 +1027,7 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) u16 len = *ptr++; /* yes, it's 8bit on the wire */ BUG_ON(len > 16); /* we've checked the contents earlier */ - opt->opt_optl = dn_htons(len); + opt->opt_optl = cpu_to_le16(len); opt->opt_status = 0; memcpy(opt->opt_data, ptr, len); skb_pull(skb, len + 1); @@ -1375,7 +1375,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->conndata_out, &u.opt, optlen); @@ -1388,7 +1388,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->discdata_out, &u.opt, optlen); @@ -2213,12 +2213,12 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) { int i; - switch (dn_ntohs(dn->sdn_objnamel)) { + switch (le16_to_cpu(dn->sdn_objnamel)) { case 0: sprintf(buf, "%d", dn->sdn_objnum); break; default: - for (i = 0; i < dn_ntohs(dn->sdn_objnamel); i++) { + for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { buf[i] = dn->sdn_objname[i]; if (IS_NOT_PRINTABLE(buf[i])) buf[i] = '.'; @@ -2281,7 +2281,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) seq_printf(seq, "%6s/%04X %04d:%04d %04d:%04d %01d %-16s " "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n", - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->addr)), buf1), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1), scp->addrloc, scp->numdat, scp->numoth, @@ -2289,7 +2289,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) scp->ackxmt_oth, scp->flowloc_sw, local_object, - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->peer)), buf2), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2), scp->addrrem, scp->numdat_rcv, scp->numoth_rcv, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 28e26bd08e2..daf2b98b15f 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -885,7 +885,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->tiver, dn_eco_version, 3); dn_dn2eth(msg->id, ifa->ifa_local); msg->iinfo = DN_RT_INFO_ENDN; - msg->blksize = dn_htons(mtu2blksize(dev)); + msg->blksize = cpu_to_le16(mtu2blksize(dev)); msg->area = 0x00; memset(msg->seed, 0, 8); memcpy(msg->neighbor, dn_hiord, ETH_ALEN); @@ -895,13 +895,13 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) dn_dn2eth(msg->neighbor, dn->addr); } - msg->timer = dn_htons((unsigned short)dn_db->parms.t3); + msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3); msg->mpd = 0x00; msg->datalen = 0x02; memset(msg->data, 0xAA, 2); pktlen = (__le16 *)skb_push(skb,2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -929,7 +929,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn if (dn->priority != dn_db->parms.priority) return 0; - if (dn_ntohs(dn->addr) < dn_ntohs(ifa->ifa_local)) + if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local)) return 1; return 0; @@ -973,11 +973,11 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) ptr += ETH_ALEN; *ptr++ = dn_db->parms.forwarding == 1 ? DN_RT_INFO_L1RT : DN_RT_INFO_L2RT; - *((__le16 *)ptr) = dn_htons(mtu2blksize(dev)); + *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev)); ptr += 2; *ptr++ = dn_db->parms.priority; /* Priority */ *ptr++ = 0; /* Area: Reserved */ - *((__le16 *)ptr) = dn_htons((unsigned short)dn_db->parms.t3); + *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3); ptr += 2; *ptr++ = 0; /* MPD: Reserved */ i1 = ptr++; @@ -993,7 +993,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) skb_trim(skb, (27 + *i2)); pktlen = (__le16 *)skb_push(skb, 2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -1106,7 +1106,7 @@ static void dn_dev_set_timer(struct net_device *dev) add_timer(&dn_db->timer); } -struct dn_dev *dn_dev_create(struct net_device *dev, int *err) +static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) { int i; struct dn_dev_parms *p = dn_dev_list; @@ -1401,8 +1401,8 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) mtu2blksize(dev), dn_db->parms.priority, dn_db->parms.state, dn_db->parms.name, - dn_db->router ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->router->primary_key), router_buf) : "", - dn_db->peer ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); + dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "", + dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); } return 0; } @@ -1445,7 +1445,7 @@ void __init dn_dev_init(void) return; } - decnet_address = dn_htons((addr[0] << 10) | addr[1]); + decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]); dn_dev_devices_on(); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 1ca13b17974..05b5aa05e50 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -250,7 +250,7 @@ static int dn_long_output(struct sk_buff *skb) data = skb_push(skb, sizeof(struct dn_long_packet) + 3); lp = (struct dn_long_packet *)(data+3); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); *(data + 2) = 1 | DN_RT_F_PF; /* Padding */ lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS)); @@ -294,7 +294,7 @@ static int dn_short_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data+2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); @@ -336,12 +336,12 @@ static int dn_phase3_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data + 2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst & dn_htons(0x03ff); - sp->srcnode = cb->src & dn_htons(0x03ff); + sp->dstnode = cb->dst & cpu_to_le16(0x03ff); + sp->srcnode = cb->src & cpu_to_le16(0x03ff); sp->forward = cb->hops & 0x3f; skb_reset_network_header(skb); @@ -394,7 +394,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = msg->priority; dn->flags &= ~DN_NDFLAG_P3; @@ -410,7 +410,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) } /* Only use routers in our area */ - if ((dn_ntohs(src)>>10) == (dn_ntohs((decnet_address))>>10)) { + if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) { if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { @@ -453,7 +453,7 @@ int dn_neigh_endnode_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = 0; } @@ -543,7 +543,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq, read_lock(&n->lock); seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n", - dn_addr2asc(dn_ntohs(dn->addr), buf), + dn_addr2asc(le16_to_cpu(dn->addr), buf), (dn->flags&DN_NDFLAG_R1) ? "1" : "-", (dn->flags&DN_NDFLAG_R2) ? "2" : "-", (dn->flags&DN_NDFLAG_P3) ? "3" : "-", diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 4074a6e5d0d..5d8a2a56fd3 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -83,7 +83,9 @@ static void dn_log_martian(struct sk_buff *skb, const char *msg) if (decnet_log_martians && net_ratelimit()) { char *devname = skb->dev ? skb->dev->name : "???"; struct dn_skb_cb *cb = DN_SKB_CB(skb); - printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, dn_ntohs(cb->src), dn_ntohs(cb->dst), dn_ntohs(cb->src_port), dn_ntohs(cb->dst_port)); + printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", + msg, devname, le16_to_cpu(cb->src), le16_to_cpu(cb->dst), + le16_to_cpu(cb->src_port), le16_to_cpu(cb->dst_port)); } } @@ -133,7 +135,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); ptr++; len += 2; @@ -147,7 +149,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); len += 2; if ((ack & 0x4000) == 0) { @@ -237,7 +239,7 @@ static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason cb->dst_port = msg->dstaddr; cb->services = msg->services; cb->info = msg->info; - cb->segsize = dn_ntohs(msg->segsize); + cb->segsize = le16_to_cpu(msg->segsize); if (!pskb_may_pull(skb, sizeof(*msg))) goto err_out; @@ -344,7 +346,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) ptr = skb->data; cb->services = *ptr++; cb->info = *ptr++; - cb->segsize = dn_ntohs(*(__le16 *)ptr); + cb->segsize = le16_to_cpu(*(__le16 *)ptr); if ((scp->state == DN_CI) || (scp->state == DN_CD)) { scp->persist = 0; @@ -361,7 +363,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = dn_htons(dlen); + scp->conndata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->conndata_in.opt_data, dlen); } @@ -396,17 +398,17 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); - scp->discdata_in.opt_status = dn_htons(reason); + scp->discdata_in.opt_status = cpu_to_le16(reason); scp->discdata_in.opt_optl = 0; memset(scp->discdata_in.opt_data, 0, 16); if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = dn_htons(dlen); + scp->discdata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen); } } @@ -463,7 +465,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) if (skb->len != 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); sk->sk_state = TCP_CLOSE; @@ -512,7 +514,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) if (skb->len != 4) goto out; - segnum = dn_ntohs(*(__le16 *)ptr); + segnum = le16_to_cpu(*(__le16 *)ptr); ptr += 2; lsflags = *(unsigned char *)ptr++; fcval = *ptr; @@ -620,7 +622,7 @@ static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numoth_rcv, segnum)) { @@ -648,7 +650,7 @@ static void dn_nsp_data(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numdat_rcv, segnum)) { diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 1964faf203e..2013c25b7f5 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -230,7 +230,6 @@ static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, /** * dn_nsp_output - Try and send something from socket queues * @sk: The socket whose queues are to be investigated - * @gfp: The memory allocation flags * * Try and send the packet on the end of the data and other data queues. * Other data gets priority over data, and if we retransmit a packet we @@ -326,8 +325,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c ptr = (__le16 *)dn_mk_common_header(scp, skb, msgflag, hlen); - *ptr++ = dn_htons(acknum); - *ptr++ = dn_htons(ackcrs); + *ptr++ = cpu_to_le16(acknum); + *ptr++ = cpu_to_le16(ackcrs); return ptr; } @@ -345,7 +344,7 @@ static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int o cb->segnum = scp->numdat; seq_add(&scp->numdat, 1); } - *(ptr++) = dn_htons(cb->segnum); + *(ptr++) = cpu_to_le16(cb->segnum); return ptr; } @@ -523,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb = NULL; struct nsp_conn_init_msg *msg; - __u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) return; @@ -534,7 +533,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; msg->info = scp->info_loc; - msg->segsize = dn_htons(scp->segsize_loc); + msg->segsize = cpu_to_le16(scp->segsize_loc); *skb_put(skb,1) = len; @@ -560,7 +559,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, if ((dst == NULL) || (rem == 0)) { if (net_ratelimit()) - printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", dn_ntohs(rem), dst); + printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", le16_to_cpu(rem), dst); return; } @@ -573,7 +572,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, msg += 2; *(__le16 *)msg = loc; msg += 2; - *(__le16 *)msg = dn_htons(reason); + *(__le16 *)msg = cpu_to_le16(reason); msg += 2; if (msgflg == NSP_DISCINIT) *msg++ = ddl; @@ -599,10 +598,10 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, int ddl = 0; if (msgflg == NSP_DISCINIT) - ddl = dn_ntohs(scp->discdata_out.opt_optl); + ddl = le16_to_cpu(scp->discdata_out.opt_optl); if (reason == 0) - reason = dn_ntohs(scp->discdata_out.opt_status); + reason = le16_to_cpu(scp->discdata_out.opt_status); dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); @@ -676,7 +675,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; /* Requested flow control */ msg->info = scp->info_loc; /* Version Number */ - msg->segsize = dn_htons(scp->segsize_loc); /* Max segment size */ + msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */ if (scp->peer.sdn_objnum) type = 0; @@ -709,7 +708,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) if (aux > 0) memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); - aux = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); *skb_put(skb, 1) = aux; if (aux > 0) memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 821bd1cdec0..c754670b7fc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -131,7 +131,6 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, - .entry_size = sizeof(struct dn_route), .entries = ATOMIC_INIT(0), }; @@ -312,7 +311,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * return 0; } -void dn_run_flush(unsigned long dummy) +static void dn_run_flush(unsigned long dummy) { int i; struct dn_route *rt, *next; @@ -476,7 +475,7 @@ static int dn_route_rx_packet(struct sk_buff *skb) printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, - dn_ntohs(cb->src), dn_ntohs(cb->dst), + le16_to_cpu(cb->src), le16_to_cpu(cb->dst), err, skb->pkt_type); } @@ -576,7 +575,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type { struct dn_skb_cb *cb; unsigned char flags = 0; - __u16 len = dn_ntohs(*(__le16 *)skb->data); + __u16 len = le16_to_cpu(*(__le16 *)skb->data); struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; @@ -774,7 +773,7 @@ static int dn_rt_bug(struct sk_buff *skb) struct dn_skb_cb *cb = DN_SKB_CB(skb); printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n", - dn_ntohs(cb->src), dn_ntohs(cb->dst)); + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); } kfree_skb(skb); @@ -817,7 +816,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) static inline int dn_match_addr(__le16 addr1, __le16 addr2) { - __u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2); + __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2); int match = 16; while(tmp) { tmp >>= 1; @@ -887,8 +886,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), - dn_ntohs(oldflp->fld_src), + " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), + le16_to_cpu(oldflp->fld_src), oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ @@ -960,7 +959,7 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - dn_ntohs(fl.fld_dst), dn_ntohs(fl.fld_src), + le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), fl.oif, try_hard); /* @@ -1185,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(pprt, flp, NULL, 0); + err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); } return err; } @@ -1196,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ? - 0 : XFRM_LOOKUP_WAIT); + err = xfrm_lookup(&init_net, pprt, fl, sk, + (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); } return err; } @@ -1423,7 +1422,7 @@ e_neighbour: goto done; } -int dn_route_input(struct sk_buff *skb) +static int dn_route_input(struct sk_buff *skb) { struct dn_route *rt; struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -1712,8 +1711,8 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", rt->u.dst.dev ? rt->u.dst.dev->name : "*", - dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1), - dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2), + dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), + dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), atomic_read(&rt->u.dst.__refcnt), rt->u.dst.__use, (int) dst_metric(&rt->u.dst, RTAX_RTT)); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 3a2830ac89c..69ad9280c69 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -85,7 +85,7 @@ static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) { - u16 h = dn_ntohs(key.datum)>>(16 - dz->dz_order); + u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order); h ^= (h >> 10); h ^= (h >> 6); h &= DZ_HASHMASK(dz); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 36400b26689..965397af9a8 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -126,7 +126,7 @@ static int parse_addr(__le16 *addr, char *str) if (INVALID_END_CHAR(*str)) return -1; - *addr = dn_htons((area << 10) | node); + *addr = cpu_to_le16((area << 10) | node); return 0; } @@ -201,7 +201,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } - dn_addr2asc(dn_ntohs(decnet_address), addr); + dn_addr2asc(le16_to_cpu(decnet_address), addr); len = strlen(addr); addr[len++] = '\n'; @@ -354,8 +354,8 @@ static ctl_table dn_table[] = { .data = node_name, .maxlen = 7, .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, + .proc_handler = proc_dostring, + .strategy = sysctl_string, }, { .ctl_name = NET_DECNET_DEFAULT_DEVICE, @@ -371,8 +371,8 @@ static ctl_table dn_table[] = { .data = &decnet_time_wait, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_time_wait, .extra2 = &max_decnet_time_wait }, @@ -382,8 +382,8 @@ static ctl_table dn_table[] = { .data = &decnet_dn_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -393,8 +393,8 @@ static ctl_table dn_table[] = { .data = &decnet_di_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -404,8 +404,8 @@ static ctl_table dn_table[] = { .data = &decnet_dr_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, @@ -415,8 +415,8 @@ static ctl_table dn_table[] = { .data = &decnet_dst_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_dst_gc_interval, .extra2 = &max_decnet_dst_gc_interval }, @@ -426,8 +426,8 @@ static ctl_table dn_table[] = { .data = &decnet_no_fc_max_cwnd, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_decnet_no_fc_max_cwnd, .extra2 = &max_decnet_no_fc_max_cwnd }, @@ -437,8 +437,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_RMEM, @@ -446,8 +446,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_rmem, .maxlen = sizeof(sysctl_decnet_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_WMEM, @@ -455,8 +455,8 @@ static ctl_table dn_table[] = { .data = &sysctl_decnet_wmem, .maxlen = sizeof(sysctl_decnet_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, { .ctl_name = NET_DECNET_DEBUG_LEVEL, @@ -464,8 +464,8 @@ static ctl_table dn_table[] = { .data = &decnet_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec, }, {0} }; diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 54068ef251e..85081ae9fe8 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -222,7 +222,7 @@ static void mv88e6060_poll_link(struct dsa_switch *ds) for (i = 0; i < DSA_MAX_PORTS; i++) { struct net_device *dev; - int port_status; + int uninitialized_var(port_status); int link; int speed; int duplex; @@ -273,14 +273,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = { .poll_link = mv88e6060_poll_link, }; -int __init mv88e6060_init(void) +static int __init mv88e6060_init(void) { register_switch_driver(&mv88e6060_switch_driver); return 0; } module_init(mv88e6060_init); -void __exit mv88e6060_cleanup(void) +static void __exit mv88e6060_cleanup(void) { unregister_switch_driver(&mv88e6060_switch_driver); } diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c index 555b164082f..ec8c6a0482d 100644 --- a/net/dsa/mv88e6123_61_65.c +++ b/net/dsa/mv88e6123_61_65.c @@ -407,14 +407,14 @@ static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { .get_sset_count = mv88e6123_61_65_get_sset_count, }; -int __init mv88e6123_61_65_init(void) +static int __init mv88e6123_61_65_init(void) { register_switch_driver(&mv88e6123_61_65_switch_driver); return 0; } module_init(mv88e6123_61_65_init); -void __exit mv88e6123_61_65_cleanup(void) +static void __exit mv88e6123_61_65_cleanup(void) { unregister_switch_driver(&mv88e6123_61_65_switch_driver); } diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 36e01eb863a..374d46a0126 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -366,14 +366,14 @@ static struct dsa_switch_driver mv88e6131_switch_driver = { .get_sset_count = mv88e6131_get_sset_count, }; -int __init mv88e6131_init(void) +static int __init mv88e6131_init(void) { register_switch_driver(&mv88e6131_switch_driver); return 0; } module_init(mv88e6131_init); -void __exit mv88e6131_cleanup(void) +static void __exit mv88e6131_cleanup(void) { unregister_switch_driver(&mv88e6131_switch_driver); } diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c index aa6c609c59f..4e4d8b5ad03 100644 --- a/net/dsa/mv88e6xxx.c +++ b/net/dsa/mv88e6xxx.c @@ -358,7 +358,7 @@ void mv88e6xxx_poll_link(struct dsa_switch *ds) for (i = 0; i < DSA_MAX_PORTS; i++) { struct net_device *dev; - int port_status; + int uninitialized_var(port_status); int link; int speed; int duplex; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1af5a79309e..a3a410d20da 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -352,7 +352,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); if (p->phy != NULL) { - phy_attach(slave_dev, p->phy->dev.bus_id, + phy_attach(slave_dev, dev_name(&p->phy->dev), 0, PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 31866543332..f99a019b939 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -162,7 +162,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - skb->dev->last_rx = jiffies; skb->dev->stats.rx_packets++; skb->dev->stats.rx_bytes += skb->len; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 9f4ce55eae5..328ec957f78 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -181,7 +181,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - skb->dev->last_rx = jiffies; skb->dev->stats.rx_packets++; skb->dev->stats.rx_bytes += skb->len; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index efd26697e71..b59132878ad 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -98,7 +98,6 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - skb->dev->last_rx = jiffies; skb->dev->stats.rx_packets++; skb->dev->stats.rx_bytes += skb->len; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index b9d85af2dd3..280352aba40 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -165,8 +165,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); eth = eth_hdr(skb); - if (is_multicast_ether_addr(eth->h_dest)) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; @@ -181,7 +181,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ else if (1 /*dev->flags&IFF_PROMISC */ ) { - if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) + if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } @@ -282,7 +282,7 @@ EXPORT_SYMBOL(eth_header_cache_update); * This doesn't change hardware matching, so needs to be overridden * for most real devices. */ -static int eth_mac_addr(struct net_device *dev, void *p) +int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@ -293,6 +293,7 @@ static int eth_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } +EXPORT_SYMBOL(eth_mac_addr); /** * eth_change_mtu - set new MTU size @@ -302,21 +303,23 @@ static int eth_mac_addr(struct net_device *dev, void *p) * Allow changing MTU size. Needs to be overridden for devices * supporting jumbo frames. */ -static int eth_change_mtu(struct net_device *dev, int new_mtu) +int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) return -EINVAL; dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL(eth_change_mtu); -static int eth_validate_addr(struct net_device *dev) +int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; return 0; } +EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, @@ -334,11 +337,11 @@ const struct header_ops eth_header_ops ____cacheline_aligned = { void ether_setup(struct net_device *dev) { dev->header_ops = ð_header_ops; - +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = eth_change_mtu; dev->set_mac_address = eth_mac_addr; dev->validate_addr = eth_validate_addr; - +#endif dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig deleted file mode 100644 index 94ed7d3cd9d..00000000000 --- a/net/ieee80211/Kconfig +++ /dev/null @@ -1,73 +0,0 @@ -config IEEE80211 - tristate "Generic IEEE 802.11 Networking Stack (DEPRECATED)" - ---help--- - This option enables the hardware independent IEEE 802.11 - networking stack. This component is deprecated in favor of the - mac80211 component. - -config IEEE80211_DEBUG - bool "Enable full debugging output" - depends on IEEE80211 - ---help--- - This option will enable debug tracing output for the - ieee80211 network stack. - - This will result in the kernel module being ~70k larger. You - can control which debug output is sent to the kernel log by - setting the value in - - /proc/net/ieee80211/debug_level - - For example: - - % echo 0x00000FFO > /proc/net/ieee80211/debug_level - - For a list of values you can assign to debug_level, you - can look at the bit mask values in <net/ieee80211.h> - - If you are not trying to debug or develop the ieee80211 - subsystem, you most likely want to say N here. - -config IEEE80211_CRYPT_WEP - tristate "IEEE 802.11 WEP encryption (802.1x)" - depends on IEEE80211 - select CRYPTO - select CRYPTO_ARC4 - select CRYPTO_ECB - select CRC32 - ---help--- - Include software based cipher suites in support of IEEE - 802.11's WEP. This is needed for WEP as well as 802.1x. - - This can be compiled as a module and it will be called - "ieee80211_crypt_wep". - -config IEEE80211_CRYPT_CCMP - tristate "IEEE 802.11i CCMP support" - depends on IEEE80211 - select CRYPTO - select CRYPTO_AES - ---help--- - Include software based cipher suites in support of IEEE 802.11i - (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with CCMP enabled - networks. - - This can be compiled as a module and it will be called - "ieee80211_crypt_ccmp". - -config IEEE80211_CRYPT_TKIP - tristate "IEEE 802.11i TKIP encryption" - depends on IEEE80211 - select WIRELESS_EXT - select CRYPTO - select CRYPTO_MICHAEL_MIC - select CRYPTO_ECB - select CRC32 - ---help--- - Include software based cipher suites in support of IEEE 802.11i - (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled - networks. - - This can be compiled as a module and it will be called - "ieee80211_crypt_tkip". - diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile deleted file mode 100644 index f988417121d..00000000000 --- a/net/ieee80211/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -obj-$(CONFIG_IEEE80211) += ieee80211.o -obj-$(CONFIG_IEEE80211) += ieee80211_crypt.o -obj-$(CONFIG_IEEE80211_CRYPT_WEP) += ieee80211_crypt_wep.o -obj-$(CONFIG_IEEE80211_CRYPT_CCMP) += ieee80211_crypt_ccmp.o -obj-$(CONFIG_IEEE80211_CRYPT_TKIP) += ieee80211_crypt_tkip.o -ieee80211-objs := \ - ieee80211_module.o \ - ieee80211_tx.o \ - ieee80211_rx.o \ - ieee80211_wx.o \ - ieee80211_geo.o - diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c deleted file mode 100644 index df5592c9339..00000000000 --- a/net/ieee80211/ieee80211_crypt.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Host AP crypto routines - * - * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - * - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <net/ieee80211.h> - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("HostAP crypto"); -MODULE_LICENSE("GPL"); - -struct ieee80211_crypto_alg { - struct list_head list; - struct ieee80211_crypto_ops *ops; -}; - -static LIST_HEAD(ieee80211_crypto_algs); -static DEFINE_SPINLOCK(ieee80211_crypto_lock); - -void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) -{ - struct ieee80211_crypt_data *entry, *next; - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(&entry->list); - - if (entry->ops) { - entry->ops->deinit(entry->priv); - module_put(entry->ops->owner); - } - kfree(entry); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -/* After this, crypt_deinit_list won't accept new members */ -void ieee80211_crypt_quiescing(struct ieee80211_device *ieee) -{ - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - ieee->crypt_quiesced = 1; - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_deinit_handler(unsigned long data) -{ - struct ieee80211_device *ieee = (struct ieee80211_device *)data; - unsigned long flags; - - ieee80211_crypt_deinit_entries(ieee, 0); - - spin_lock_irqsave(&ieee->lock, flags); - if (!list_empty(&ieee->crypt_deinit_list) && !ieee->crypt_quiesced) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", ieee->dev->name); - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, - struct ieee80211_crypt_data **crypt) -{ - struct ieee80211_crypt_data *tmp; - unsigned long flags; - - if (*crypt == NULL) - return; - - tmp = *crypt; - *crypt = NULL; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(&ieee->lock, flags); - if (!ieee->crypt_quiesced) { - list_add(&tmp->list, &ieee->crypt_deinit_list); - if (!timer_pending(&ieee->crypt_deinit_timer)) { - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - unsigned long flags; - struct ieee80211_crypto_alg *alg; - - alg = kzalloc(sizeof(*alg), GFP_KERNEL); - if (alg == NULL) - return -ENOMEM; - - alg->ops = ops; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_add(&alg->list, &ieee80211_crypto_algs); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - - printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n", - ops->name); - - return 0; -} - -int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (alg->ops == ops) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return -EINVAL; - - found: - printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); - list_del(&alg->list); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - kfree(alg); - return 0; -} - -struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (strcmp(alg->ops->name, name) == 0) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return NULL; - - found: - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return alg->ops; -} - -static void *ieee80211_crypt_null_init(int keyidx) -{ - return (void *)1; -} - -static void ieee80211_crypt_null_deinit(void *priv) -{ -} - -static struct ieee80211_crypto_ops ieee80211_crypt_null = { - .name = "NULL", - .init = ieee80211_crypt_null_init, - .deinit = ieee80211_crypt_null_deinit, - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_null); -} - -static void __exit ieee80211_crypto_deinit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_null); - BUG_ON(!list_empty(&ieee80211_crypto_algs)); -} - -EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); -EXPORT_SYMBOL(ieee80211_crypt_deinit_handler); -EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit); -EXPORT_SYMBOL(ieee80211_crypt_quiescing); - -EXPORT_SYMBOL(ieee80211_register_crypto_ops); -EXPORT_SYMBOL(ieee80211_unregister_crypto_ops); -EXPORT_SYMBOL(ieee80211_get_crypto_ops); - -module_init(ieee80211_crypto_init); -module_exit(ieee80211_crypto_deinit); diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c deleted file mode 100644 index 960ad13f5e9..00000000000 --- a/net/ieee80211/ieee80211_geo.c +++ /dev/null @@ -1,195 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2005 Intel Corporation. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> - -#include <net/ieee80211.h> - -int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - /* NOTE: If G mode is currently supported but - * this is a B only channel, we don't see it - * as valid. */ - if ((ieee->geo.bg[i].channel == channel) && - !(ieee->geo.bg[i].flags & IEEE80211_CH_INVALID) && - (!(ieee->mode & IEEE_G) || - !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY))) - return IEEE80211_24GHZ_BAND; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if ((ieee->geo.a[i].channel == channel) && - !(ieee->geo.a[i].flags & IEEE80211_CH_INVALID)) - return IEEE80211_52GHZ_BAND; - - return 0; -} - -int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return -1; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].channel == channel) - return i; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].channel == channel) - return i; - - return -1; -} - -u32 ieee80211_channel_to_freq(struct ieee80211_device * ieee, u8 channel) -{ - const struct ieee80211_channel * ch; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - ch = ieee80211_get_channel(ieee, channel); - if (!ch->channel) - return 0; - return ch->freq; -} - -u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq) -{ - int i; - - /* Driver needs to initialize the geography map before using - * these helper functions */ - if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) - return 0; - - freq /= 100000; - - if (ieee->freq_band & IEEE80211_24GHZ_BAND) - for (i = 0; i < ieee->geo.bg_channels; i++) - if (ieee->geo.bg[i].freq == freq) - return ieee->geo.bg[i].channel; - - if (ieee->freq_band & IEEE80211_52GHZ_BAND) - for (i = 0; i < ieee->geo.a_channels; i++) - if (ieee->geo.a[i].freq == freq) - return ieee->geo.a[i].channel; - - return 0; -} - -int ieee80211_set_geo(struct ieee80211_device *ieee, - const struct ieee80211_geo *geo) -{ - memcpy(ieee->geo.name, geo->name, 3); - ieee->geo.name[3] = '\0'; - ieee->geo.bg_channels = geo->bg_channels; - ieee->geo.a_channels = geo->a_channels; - memcpy(ieee->geo.bg, geo->bg, geo->bg_channels * - sizeof(struct ieee80211_channel)); - memcpy(ieee->geo.a, geo->a, ieee->geo.a_channels * - sizeof(struct ieee80211_channel)); - return 0; -} - -const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device *ieee) -{ - return &ieee->geo; -} - -u8 ieee80211_get_channel_flags(struct ieee80211_device * ieee, u8 channel) -{ - int index = ieee80211_channel_to_index(ieee, channel); - - if (index == -1) - return IEEE80211_CH_INVALID; - - if (channel <= IEEE80211_24GHZ_CHANNELS) - return ieee->geo.bg[index].flags; - - return ieee->geo.a[index].flags; -} - -static const struct ieee80211_channel bad_channel = { - .channel = 0, - .flags = IEEE80211_CH_INVALID, - .max_power = 0, -}; - -const struct ieee80211_channel *ieee80211_get_channel(struct ieee80211_device - *ieee, u8 channel) -{ - int index = ieee80211_channel_to_index(ieee, channel); - - if (index == -1) - return &bad_channel; - - if (channel <= IEEE80211_24GHZ_CHANNELS) - return &ieee->geo.bg[index]; - - return &ieee->geo.a[index]; -} - -EXPORT_SYMBOL(ieee80211_get_channel); -EXPORT_SYMBOL(ieee80211_get_channel_flags); -EXPORT_SYMBOL(ieee80211_is_valid_channel); -EXPORT_SYMBOL(ieee80211_freq_to_channel); -EXPORT_SYMBOL(ieee80211_channel_to_freq); -EXPORT_SYMBOL(ieee80211_channel_to_index); -EXPORT_SYMBOL(ieee80211_set_geo); -EXPORT_SYMBOL(ieee80211_get_geo); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c deleted file mode 100644 index 949772a5a7d..00000000000 --- a/net/ieee80211/ieee80211_module.c +++ /dev/null @@ -1,338 +0,0 @@ -/******************************************************************************* - - Copyright(c) 2004-2005 Intel Corporation. All rights reserved. - - Portions of this file are based on the WEP enablement code provided by the - Host AP project hostap-drivers v0.1.3 - Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - <j@w1.fi> - Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> -#include <net/net_namespace.h> -#include <net/arp.h> - -#include <net/ieee80211.h> - -#define DRV_DESCRIPTION "802.11 data/management/control stack" -#define DRV_NAME "ieee80211" -#define DRV_VERSION IEEE80211_VERSION -#define DRV_COPYRIGHT "Copyright (C) 2004-2005 Intel Corporation <jketreno@linux.intel.com>" - -MODULE_VERSION(DRV_VERSION); -MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_AUTHOR(DRV_COPYRIGHT); -MODULE_LICENSE("GPL"); - -static int ieee80211_networks_allocate(struct ieee80211_device *ieee) -{ - if (ieee->networks) - return 0; - - ieee->networks = - kzalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network), - GFP_KERNEL); - if (!ieee->networks) { - printk(KERN_WARNING "%s: Out of memory allocating beacons\n", - ieee->dev->name); - return -ENOMEM; - } - - return 0; -} - -void ieee80211_network_reset(struct ieee80211_network *network) -{ - if (!network) - return; - - if (network->ibss_dfs) { - kfree(network->ibss_dfs); - network->ibss_dfs = NULL; - } -} - -static inline void ieee80211_networks_free(struct ieee80211_device *ieee) -{ - int i; - - if (!ieee->networks) - return; - - for (i = 0; i < MAX_NETWORK_COUNT; i++) - if (ieee->networks[i].ibss_dfs) - kfree(ieee->networks[i].ibss_dfs); - - kfree(ieee->networks); - ieee->networks = NULL; -} - -static void ieee80211_networks_initialize(struct ieee80211_device *ieee) -{ - int i; - - INIT_LIST_HEAD(&ieee->network_free_list); - INIT_LIST_HEAD(&ieee->network_list); - for (i = 0; i < MAX_NETWORK_COUNT; i++) - list_add_tail(&ieee->networks[i].list, - &ieee->network_free_list); -} - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > IEEE80211_DATA_LEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - -static struct net_device_stats *ieee80211_generic_get_stats( - struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - return &ieee->stats; -} - -struct net_device *alloc_ieee80211(int sizeof_priv) -{ - struct ieee80211_device *ieee; - struct net_device *dev; - int err; - - IEEE80211_DEBUG_INFO("Initializing...\n"); - - dev = alloc_etherdev(sizeof(struct ieee80211_device) + sizeof_priv); - if (!dev) { - IEEE80211_ERROR("Unable to allocate network device.\n"); - goto failed; - } - ieee = netdev_priv(dev); - dev->hard_start_xmit = ieee80211_xmit; - dev->change_mtu = ieee80211_change_mtu; - - /* Drivers are free to override this if the generic implementation - * does not meet their needs. */ - dev->get_stats = ieee80211_generic_get_stats; - - ieee->dev = dev; - - err = ieee80211_networks_allocate(ieee); - if (err) { - IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); - goto failed_free_netdev; - } - ieee80211_networks_initialize(ieee); - - /* Default fragmentation threshold is maximum payload size */ - ieee->fts = DEFAULT_FTS; - ieee->rts = DEFAULT_FTS; - ieee->scan_age = DEFAULT_MAX_SCAN_AGE; - ieee->open_wep = 1; - - /* Default to enabling full open WEP with host based encrypt/decrypt */ - ieee->host_encrypt = 1; - ieee->host_decrypt = 1; - ieee->host_mc_decrypt = 1; - - /* Host fragementation in Open mode. Default is enabled. - * Note: host fragmentation is always enabled if host encryption - * is enabled. For cards can do hardware encryption, they must do - * hardware fragmentation as well. So we don't need a variable - * like host_enc_frag. */ - ieee->host_open_frag = 1; - ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ - - INIT_LIST_HEAD(&ieee->crypt_deinit_list); - setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler, - (unsigned long)ieee); - ieee->crypt_quiesced = 0; - - spin_lock_init(&ieee->lock); - - ieee->wpa_enabled = 0; - ieee->drop_unencrypted = 0; - ieee->privacy_invoked = 0; - - return dev; - -failed_free_netdev: - free_netdev(dev); -failed: - return NULL; -} - -void free_ieee80211(struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - - int i; - - ieee80211_crypt_quiescing(ieee); - del_timer_sync(&ieee->crypt_deinit_timer); - ieee80211_crypt_deinit_entries(ieee, 1); - - for (i = 0; i < WEP_KEYS; i++) { - struct ieee80211_crypt_data *crypt = ieee->crypt[i]; - if (crypt) { - if (crypt->ops) { - crypt->ops->deinit(crypt->priv); - module_put(crypt->ops->owner); - } - kfree(crypt); - ieee->crypt[i] = NULL; - } - } - - ieee80211_networks_free(ieee); - free_netdev(dev); -} - -#ifdef CONFIG_IEEE80211_DEBUG - -static int debug = 0; -u32 ieee80211_debug_level = 0; -EXPORT_SYMBOL_GPL(ieee80211_debug_level); -static struct proc_dir_entry *ieee80211_proc = NULL; - -static int show_debug_level(char *page, char **start, off_t offset, - int count, int *eof, void *data) -{ - return snprintf(page, count, "0x%08X\n", ieee80211_debug_level); -} - -static int store_debug_level(struct file *file, const char __user * buffer, - unsigned long count, void *data) -{ - char buf[] = "0x00000000\n"; - unsigned long len = min((unsigned long)sizeof(buf) - 1, count); - unsigned long val; - - if (copy_from_user(buf, buffer, len)) - return count; - buf[len] = 0; - if (sscanf(buf, "%li", &val) != 1) - printk(KERN_INFO DRV_NAME - ": %s is not in hex or decimal form.\n", buf); - else - ieee80211_debug_level = val; - - return strnlen(buf, len); -} -#endif /* CONFIG_IEEE80211_DEBUG */ - -static int __init ieee80211_init(void) -{ -#ifdef CONFIG_IEEE80211_DEBUG - struct proc_dir_entry *e; - - ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); - if (ieee80211_proc == NULL) { - IEEE80211_ERROR("Unable to create " DRV_NAME - " proc directory\n"); - return -EIO; - } - e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, - ieee80211_proc); - if (!e) { - remove_proc_entry(DRV_NAME, init_net.proc_net); - ieee80211_proc = NULL; - return -EIO; - } - e->read_proc = show_debug_level; - e->write_proc = store_debug_level; - e->data = NULL; -#endif /* CONFIG_IEEE80211_DEBUG */ - - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION ", " DRV_VERSION "\n"); - printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); - - return 0; -} - -static void __exit ieee80211_exit(void) -{ -#ifdef CONFIG_IEEE80211_DEBUG - if (ieee80211_proc) { - remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, init_net.proc_net); - ieee80211_proc = NULL; - } -#endif /* CONFIG_IEEE80211_DEBUG */ -} - -#ifdef CONFIG_IEEE80211_DEBUG -#include <linux/moduleparam.h> -module_param(debug, int, 0444); -MODULE_PARM_DESC(debug, "debug output mask"); -#endif /* CONFIG_IEEE80211_DEBUG */ - -module_exit(ieee80211_exit); -module_init(ieee80211_init); - -const char *escape_essid(const char *essid, u8 essid_len) -{ - static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; - const char *s = essid; - char *d = escaped; - - if (ieee80211_is_empty_essid(essid, essid_len)) { - memcpy(escaped, "<hidden>", sizeof("<hidden>")); - return escaped; - } - - essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); - while (essid_len--) { - if (*s == '\0') { - *d++ = '\\'; - *d++ = '0'; - s++; - } else { - *d++ = *s++; - } - } - *d = '\0'; - return escaped; -} - -EXPORT_SYMBOL(alloc_ieee80211); -EXPORT_SYMBOL(free_ieee80211); -EXPORT_SYMBOL(escape_essid); diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c deleted file mode 100644 index 69dbc342a46..00000000000 --- a/net/ieee80211/ieee80211_rx.c +++ /dev/null @@ -1,1831 +0,0 @@ -/* - * Original code based Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3 - hostap.o module, common routines - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * <j@w1.fi> - * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - * Copyright (c) 2004-2005, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> -#include <linux/ctype.h> - -#include <net/ieee80211.h> - -static void ieee80211_monitor_rx(struct ieee80211_device *ieee, - struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u16 fc = le16_to_cpu(hdr->frame_ctl); - - skb->dev = ieee->dev; - skb_reset_mac_header(skb); - skb_pull(skb, ieee80211_get_hdrlen(fc)); - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_80211_RAW); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -/* Called only as a tasklet (software IRQ) */ -static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct - ieee80211_device - *ieee, - unsigned int seq, - unsigned int frag, - u8 * src, - u8 * dst) -{ - struct ieee80211_frag_entry *entry; - int i; - - for (i = 0; i < IEEE80211_FRAG_CACHE_LEN; i++) { - entry = &ieee->frag_cache[i]; - if (entry->skb != NULL && - time_after(jiffies, entry->first_frag_time + 2 * HZ)) { - IEEE80211_DEBUG_FRAG("expiring fragment cache entry " - "seq=%u last_frag=%u\n", - entry->seq, entry->last_frag); - dev_kfree_skb_any(entry->skb); - entry->skb = NULL; - } - - if (entry->skb != NULL && entry->seq == seq && - (entry->last_frag + 1 == frag || frag == -1) && - !compare_ether_addr(entry->src_addr, src) && - !compare_ether_addr(entry->dst_addr, dst)) - return entry; - } - - return NULL; -} - -/* Called only as a tasklet (software IRQ) */ -static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *hdr) -{ - struct sk_buff *skb = NULL; - u16 sc; - unsigned int frag, seq; - struct ieee80211_frag_entry *entry; - - sc = le16_to_cpu(hdr->seq_ctl); - frag = WLAN_GET_SEQ_FRAG(sc); - seq = WLAN_GET_SEQ_SEQ(sc); - - if (frag == 0) { - /* Reserve enough space to fit maximum frame length */ - skb = dev_alloc_skb(ieee->dev->mtu + - sizeof(struct ieee80211_hdr_4addr) + - 8 /* LLC */ + - 2 /* alignment */ + - 8 /* WEP */ + ETH_ALEN /* WDS */ ); - if (skb == NULL) - return NULL; - - entry = &ieee->frag_cache[ieee->frag_next_idx]; - ieee->frag_next_idx++; - if (ieee->frag_next_idx >= IEEE80211_FRAG_CACHE_LEN) - ieee->frag_next_idx = 0; - - if (entry->skb != NULL) - dev_kfree_skb_any(entry->skb); - - entry->first_frag_time = jiffies; - entry->seq = seq; - entry->last_frag = frag; - entry->skb = skb; - memcpy(entry->src_addr, hdr->addr2, ETH_ALEN); - memcpy(entry->dst_addr, hdr->addr1, ETH_ALEN); - } else { - /* received a fragment of a frame for which the head fragment - * should have already been received */ - entry = ieee80211_frag_cache_find(ieee, seq, frag, hdr->addr2, - hdr->addr1); - if (entry != NULL) { - entry->last_frag = frag; - skb = entry->skb; - } - } - - return skb; -} - -/* Called only as a tasklet (software IRQ) */ -static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *hdr) -{ - u16 sc; - unsigned int seq; - struct ieee80211_frag_entry *entry; - - sc = le16_to_cpu(hdr->seq_ctl); - seq = WLAN_GET_SEQ_SEQ(sc); - - entry = ieee80211_frag_cache_find(ieee, seq, -1, hdr->addr2, - hdr->addr1); - - if (entry == NULL) { - IEEE80211_DEBUG_FRAG("could not invalidate fragment cache " - "entry (seq=%u)\n", seq); - return -1; - } - - entry->skb = NULL; - return 0; -} - -#ifdef NOT_YET -/* ieee80211_rx_frame_mgtmt - * - * Responsible for handling management control frames - * - * Called by ieee80211_rx */ -static int -ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats, u16 type, - u16 stype) -{ - if (ieee->iw_mode == IW_MODE_MASTER) { - printk(KERN_DEBUG "%s: Master mode not yet suppported.\n", - ieee->dev->name); - return 0; -/* - hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr_4addr *) - skb->data);*/ - } - - if (ieee->hostapd && type == WLAN_FC_TYPE_MGMT) { - if (stype == WLAN_FC_STYPE_BEACON && - ieee->iw_mode == IW_MODE_MASTER) { - struct sk_buff *skb2; - /* Process beacon frames also in kernel driver to - * update STA(AP) table statistics */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2) - hostap_rx(skb2->dev, skb2, rx_stats); - } - - /* send management frames to the user space daemon for - * processing */ - ieee->apdevstats.rx_packets++; - ieee->apdevstats.rx_bytes += skb->len; - prism2_rx_80211(ieee->apdev, skb, rx_stats, PRISM2_RX_MGMT); - return 0; - } - - if (ieee->iw_mode == IW_MODE_MASTER) { - if (type != WLAN_FC_TYPE_MGMT && type != WLAN_FC_TYPE_CTRL) { - printk(KERN_DEBUG "%s: unknown management frame " - "(type=0x%02x, stype=0x%02x) dropped\n", - skb->dev->name, type, stype); - return -1; - } - - hostap_rx(skb->dev, skb, rx_stats); - return 0; - } - - printk(KERN_DEBUG "%s: hostap_rx_frame_mgmt: management frame " - "received in non-Host AP mode\n", skb->dev->name); - return -1; -} -#endif - -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static unsigned char rfc1042_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -static unsigned char bridge_tunnel_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; -/* No encapsulation header if EtherType < 0x600 (=length) */ - -/* Called by ieee80211_rx_frame_decrypt */ -static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, - struct sk_buff *skb) -{ - struct net_device *dev = ieee->dev; - u16 fc, ethertype; - struct ieee80211_hdr_3addr *hdr; - u8 *pos; - - if (skb->len < 24) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - fc = le16_to_cpu(hdr->frame_ctl); - - /* check that the frame is unicast frame to us */ - if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_TODS && - !compare_ether_addr(hdr->addr1, dev->dev_addr) && - !compare_ether_addr(hdr->addr3, dev->dev_addr)) { - /* ToDS frame with own addr BSSID and DA */ - } else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_FROMDS && - !compare_ether_addr(hdr->addr1, dev->dev_addr)) { - /* FromDS frame with own addr as DA */ - } else - return 0; - - if (skb->len < 24 + 8) - return 0; - - /* check for port access entity Ethernet type */ - pos = skb->data + 24; - ethertype = (pos[6] << 8) | pos[7]; - if (ethertype == ETH_P_PAE) - return 1; - - return 0; -} - -/* Called only as a tasklet (software IRQ), by ieee80211_rx */ -static int -ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_crypt_data *crypt) -{ - struct ieee80211_hdr_3addr *hdr; - int res, hdrlen; - - if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); - - atomic_inc(&crypt->refcnt); - res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); - atomic_dec(&crypt->refcnt); - if (res < 0) { - IEEE80211_DEBUG_DROP("decryption failed (SA=" MAC_FMT - ") res=%d\n", - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5], - res); - if (res == -2) - IEEE80211_DEBUG_DROP("Decryption failed ICV " - "mismatch (key %d)\n", - skb->data[hdrlen + 3] >> 6); - ieee->ieee_stats.rx_discards_undecryptable++; - return -1; - } - - return res; -} - -/* Called only as a tasklet (software IRQ), by ieee80211_rx */ -static int -ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, - struct sk_buff *skb, int keyidx, - struct ieee80211_crypt_data *crypt) -{ - struct ieee80211_hdr_3addr *hdr; - int res, hdrlen; - - if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) - return 0; - - hdr = (struct ieee80211_hdr_3addr *)skb->data; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); - - atomic_inc(&crypt->refcnt); - res = crypt->ops->decrypt_msdu(skb, keyidx, hdrlen, crypt->priv); - atomic_dec(&crypt->refcnt); - if (res < 0) { - printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed" - " (SA=" MAC_FMT " keyidx=%d)\n", - ieee->dev->name, - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5], - keyidx); - return -1; - } - - return 0; -} - -/* All received frames are sent to this function. @skb contains the frame in - * IEEE 802.11 format, i.e., in the format it was sent over air. - * This function is called only as a tasklet (software IRQ). */ -int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_rx_stats *rx_stats) -{ - struct net_device *dev = ieee->dev; - struct ieee80211_hdr_4addr *hdr; - size_t hdrlen; - u16 fc, type, stype, sc; - struct net_device_stats *stats; - unsigned int frag; - u8 *payload; - u16 ethertype; -#ifdef NOT_YET - struct net_device *wds = NULL; - struct sk_buff *skb2 = NULL; - struct net_device *wds = NULL; - int frame_authorized = 0; - int from_assoc_ap = 0; - void *sta = NULL; -#endif - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt = NULL; - int keyidx = 0; - int can_be_decrypted = 0; - DECLARE_MAC_BUF(mac); - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - stats = &ieee->stats; - - if (skb->len < 10) { - printk(KERN_INFO "%s: SKB length < 10\n", dev->name); - goto rx_dropped; - } - - fc = le16_to_cpu(hdr->frame_ctl); - type = WLAN_FC_GET_TYPE(fc); - stype = WLAN_FC_GET_STYPE(fc); - sc = le16_to_cpu(hdr->seq_ctl); - frag = WLAN_GET_SEQ_FRAG(sc); - hdrlen = ieee80211_get_hdrlen(fc); - - if (skb->len < hdrlen) { - printk(KERN_INFO "%s: invalid SKB length %d\n", - dev->name, skb->len); - goto rx_dropped; - } - - /* Put this code here so that we avoid duplicating it in all - * Rx paths. - Jean II */ -#ifdef CONFIG_WIRELESS_EXT -#ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ - /* If spy monitoring on */ - if (ieee->spy_data.spy_number > 0) { - struct iw_quality wstats; - - wstats.updated = 0; - if (rx_stats->mask & IEEE80211_STATMASK_RSSI) { - wstats.level = rx_stats->signal; - wstats.updated |= IW_QUAL_LEVEL_UPDATED; - } else - wstats.updated |= IW_QUAL_LEVEL_INVALID; - - if (rx_stats->mask & IEEE80211_STATMASK_NOISE) { - wstats.noise = rx_stats->noise; - wstats.updated |= IW_QUAL_NOISE_UPDATED; - } else - wstats.updated |= IW_QUAL_NOISE_INVALID; - - if (rx_stats->mask & IEEE80211_STATMASK_SIGNAL) { - wstats.qual = rx_stats->signal; - wstats.updated |= IW_QUAL_QUAL_UPDATED; - } else - wstats.updated |= IW_QUAL_QUAL_INVALID; - - /* Update spy records */ - wireless_spy_update(ieee->dev, hdr->addr2, &wstats); - } -#endif /* IW_WIRELESS_SPY */ -#endif /* CONFIG_WIRELESS_EXT */ - -#ifdef NOT_YET - hostap_update_rx_stats(local->ap, hdr, rx_stats); -#endif - - if (ieee->iw_mode == IW_MODE_MONITOR) { - stats->rx_packets++; - stats->rx_bytes += skb->len; - ieee80211_monitor_rx(ieee, skb, rx_stats); - return 1; - } - - can_be_decrypted = (is_multicast_ether_addr(hdr->addr1) || - is_broadcast_ether_addr(hdr->addr2)) ? - ieee->host_mc_decrypt : ieee->host_decrypt; - - if (can_be_decrypted) { - if (skb->len >= hdrlen + 3) { - /* Top two-bits of byte 3 are the key index */ - keyidx = skb->data[hdrlen + 3] >> 6; - } - - /* ieee->crypt[] is WEP_KEY (4) in length. Given that keyidx - * is only allowed 2-bits of storage, no value of keyidx can - * be provided via above code that would result in keyidx - * being out of range */ - crypt = ieee->crypt[keyidx]; - -#ifdef NOT_YET - sta = NULL; - - /* Use station specific key to override default keys if the - * receiver address is a unicast address ("individual RA"). If - * bcrx_sta_key parameter is set, station specific key is used - * even with broad/multicast targets (this is against IEEE - * 802.11, but makes it easier to use different keys with - * stations that do not support WEP key mapping). */ - - if (!(hdr->addr1[0] & 0x01) || local->bcrx_sta_key) - (void)hostap_handle_sta_crypto(local, hdr, &crypt, - &sta); -#endif - - /* allow NULL decrypt to indicate an station specific override - * for default encryption */ - if (crypt && (crypt->ops == NULL || - crypt->ops->decrypt_mpdu == NULL)) - crypt = NULL; - - if (!crypt && (fc & IEEE80211_FCTL_PROTECTED)) { - /* This seems to be triggered by some (multicast?) - * frames from other than current BSS, so just drop the - * frames silently instead of filling system log with - * these reports. */ - IEEE80211_DEBUG_DROP("Decryption failed (not set)" - " (SA=" MAC_FMT ")\n", - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5]); - ieee->ieee_stats.rx_discards_undecryptable++; - goto rx_dropped; - } - } -#ifdef NOT_YET - if (type != WLAN_FC_TYPE_DATA) { - if (type == WLAN_FC_TYPE_MGMT && stype == WLAN_FC_STYPE_AUTH && - fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt && - (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0) { - printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth " - "from " MAC_FMT "\n", dev->name, - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5]); - /* TODO: could inform hostapd about this so that it - * could send auth failure report */ - goto rx_dropped; - } - - if (ieee80211_rx_frame_mgmt(ieee, skb, rx_stats, type, stype)) - goto rx_dropped; - else - goto rx_exit; - } -#endif - /* drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.29) */ - if (sc == ieee->prev_seq_ctl) - goto rx_dropped; - else - ieee->prev_seq_ctl = sc; - - /* Data frame - extract src/dst addresses */ - if (skb->len < IEEE80211_3ADDR_LEN) - goto rx_dropped; - - switch (fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_FROMDS: - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - break; - case IEEE80211_FCTL_TODS: - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - if (skb->len < IEEE80211_4ADDR_LEN) - goto rx_dropped; - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - break; - case 0: - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - break; - } - -#ifdef NOT_YET - if (hostap_rx_frame_wds(ieee, hdr, fc, &wds)) - goto rx_dropped; - if (wds) { - skb->dev = dev = wds; - stats = hostap_get_stats(dev); - } - - if (ieee->iw_mode == IW_MODE_MASTER && !wds && - (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_FROMDS && ieee->stadev - && !compare_ether_addr(hdr->addr2, ieee->assoc_ap_addr)) { - /* Frame from BSSID of the AP for which we are a client */ - skb->dev = dev = ieee->stadev; - stats = hostap_get_stats(dev); - from_assoc_ap = 1; - } -#endif - - dev->last_rx = jiffies; - -#ifdef NOT_YET - if ((ieee->iw_mode == IW_MODE_MASTER || - ieee->iw_mode == IW_MODE_REPEAT) && !from_assoc_ap) { - switch (hostap_handle_sta_rx(ieee, dev, skb, rx_stats, - wds != NULL)) { - case AP_RX_CONTINUE_NOT_AUTHORIZED: - frame_authorized = 0; - break; - case AP_RX_CONTINUE: - frame_authorized = 1; - break; - case AP_RX_DROP: - goto rx_dropped; - case AP_RX_EXIT: - goto rx_exit; - } - } -#endif - - /* Nullfunc frames may have PS-bit set, so they must be passed to - * hostap_handle_sta_rx() before being dropped here. */ - - stype &= ~IEEE80211_STYPE_QOS_DATA; - - if (stype != IEEE80211_STYPE_DATA && - stype != IEEE80211_STYPE_DATA_CFACK && - stype != IEEE80211_STYPE_DATA_CFPOLL && - stype != IEEE80211_STYPE_DATA_CFACKPOLL) { - if (stype != IEEE80211_STYPE_NULLFUNC) - IEEE80211_DEBUG_DROP("RX: dropped data frame " - "with no data (type=0x%02x, " - "subtype=0x%02x, len=%d)\n", - type, stype, skb->len); - goto rx_dropped; - } - - /* skb: hdr + (possibly fragmented, possibly encrypted) payload */ - - if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted && - (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0) - goto rx_dropped; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - - /* skb: hdr + (possibly fragmented) plaintext payload */ - // PR: FIXME: hostap has additional conditions in the "if" below: - // ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) && - if ((frag != 0) || (fc & IEEE80211_FCTL_MOREFRAGS)) { - int flen; - struct sk_buff *frag_skb = ieee80211_frag_cache_get(ieee, hdr); - IEEE80211_DEBUG_FRAG("Rx Fragment received (%u)\n", frag); - - if (!frag_skb) { - IEEE80211_DEBUG(IEEE80211_DL_RX | IEEE80211_DL_FRAG, - "Rx cannot get skb from fragment " - "cache (morefrag=%d seq=%u frag=%u)\n", - (fc & IEEE80211_FCTL_MOREFRAGS) != 0, - WLAN_GET_SEQ_SEQ(sc), frag); - goto rx_dropped; - } - - flen = skb->len; - if (frag != 0) - flen -= hdrlen; - - if (frag_skb->tail + flen > frag_skb->end) { - printk(KERN_WARNING "%s: host decrypted and " - "reassembled frame did not fit skb\n", - dev->name); - ieee80211_frag_cache_invalidate(ieee, hdr); - goto rx_dropped; - } - - if (frag == 0) { - /* copy first fragment (including full headers) into - * beginning of the fragment cache skb */ - skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen); - } else { - /* append frame payload to the end of the fragment - * cache skb */ - skb_copy_from_linear_data_offset(skb, hdrlen, - skb_put(frag_skb, flen), flen); - } - dev_kfree_skb_any(skb); - skb = NULL; - - if (fc & IEEE80211_FCTL_MOREFRAGS) { - /* more fragments expected - leave the skb in fragment - * cache for now; it will be delivered to upper layers - * after all fragments have been received */ - goto rx_exit; - } - - /* this was the last fragment and the frame will be - * delivered, so remove skb from fragment cache */ - skb = frag_skb; - hdr = (struct ieee80211_hdr_4addr *)skb->data; - ieee80211_frag_cache_invalidate(ieee, hdr); - } - - /* skb: hdr + (possible reassembled) full MSDU payload; possibly still - * encrypted/authenticated */ - if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted && - ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt)) - goto rx_dropped; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) { - if ( /*ieee->ieee802_1x && */ - ieee80211_is_eapol_frame(ieee, skb)) { - /* pass unencrypted EAPOL frames even if encryption is - * configured */ - } else { - IEEE80211_DEBUG_DROP("encryption configured, but RX " - "frame not encrypted (SA=" - MAC_FMT ")\n", - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5]); - goto rx_dropped; - } - } - - if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep && - !ieee80211_is_eapol_frame(ieee, skb)) { - IEEE80211_DEBUG_DROP("dropped unencrypted RX data " - "frame from " MAC_FMT - " (drop_unencrypted=1)\n", - hdr->addr2[0], hdr->addr2[1], - hdr->addr2[2], hdr->addr2[3], - hdr->addr2[4], hdr->addr2[5]); - goto rx_dropped; - } - - /* If the frame was decrypted in hardware, we may need to strip off - * any security data (IV, ICV, etc) that was left behind */ - if (!can_be_decrypted && (fc & IEEE80211_FCTL_PROTECTED) && - ieee->host_strip_iv_icv) { - int trimlen = 0; - - /* Top two-bits of byte 3 are the key index */ - if (skb->len >= hdrlen + 3) - keyidx = skb->data[hdrlen + 3] >> 6; - - /* To strip off any security data which appears before the - * payload, we simply increase hdrlen (as the header gets - * chopped off immediately below). For the security data which - * appears after the payload, we use skb_trim. */ - - switch (ieee->sec.encode_alg[keyidx]) { - case SEC_ALG_WEP: - /* 4 byte IV */ - hdrlen += 4; - /* 4 byte ICV */ - trimlen = 4; - break; - case SEC_ALG_TKIP: - /* 4 byte IV, 4 byte ExtIV */ - hdrlen += 8; - /* 8 byte MIC, 4 byte ICV */ - trimlen = 12; - break; - case SEC_ALG_CCMP: - /* 8 byte CCMP header */ - hdrlen += 8; - /* 8 byte MIC */ - trimlen = 8; - break; - } - - if (skb->len < trimlen) - goto rx_dropped; - - __skb_trim(skb, skb->len - trimlen); - - if (skb->len < hdrlen) - goto rx_dropped; - } - - /* skb: hdr + (possible reassembled) full plaintext payload */ - - payload = skb->data + hdrlen; - ethertype = (payload[6] << 8) | payload[7]; - -#ifdef NOT_YET - /* If IEEE 802.1X is used, check whether the port is authorized to send - * the received frame. */ - if (ieee->ieee802_1x && ieee->iw_mode == IW_MODE_MASTER) { - if (ethertype == ETH_P_PAE) { - printk(KERN_DEBUG "%s: RX: IEEE 802.1X frame\n", - dev->name); - if (ieee->hostapd && ieee->apdev) { - /* Send IEEE 802.1X frames to the user - * space daemon for processing */ - prism2_rx_80211(ieee->apdev, skb, rx_stats, - PRISM2_RX_MGMT); - ieee->apdevstats.rx_packets++; - ieee->apdevstats.rx_bytes += skb->len; - goto rx_exit; - } - } else if (!frame_authorized) { - printk(KERN_DEBUG "%s: dropped frame from " - "unauthorized port (IEEE 802.1X): " - "ethertype=0x%04x\n", dev->name, ethertype); - goto rx_dropped; - } - } -#endif - - /* convert hdr + possible LLC headers into Ethernet header */ - if (skb->len - hdrlen >= 8 && - ((memcmp(payload, rfc1042_header, SNAP_SIZE) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - memcmp(payload, bridge_tunnel_header, SNAP_SIZE) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + SNAP_SIZE); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - __be16 len; - /* Leave Ethernet header part of hdr and full payload */ - skb_pull(skb, hdrlen); - len = htons(skb->len); - memcpy(skb_push(skb, 2), &len, 2); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } - -#ifdef NOT_YET - if (wds && ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) { - /* Non-standard frame: get addr4 from its bogus location after - * the payload */ - skb_copy_to_linear_data_offset(skb, ETH_ALEN, - skb->data + skb->len - ETH_ALEN, - ETH_ALEN); - skb_trim(skb, skb->len - ETH_ALEN); - } -#endif - - stats->rx_packets++; - stats->rx_bytes += skb->len; - -#ifdef NOT_YET - if (ieee->iw_mode == IW_MODE_MASTER && !wds && ieee->ap->bridge_packets) { - if (dst[0] & 0x01) { - /* copy multicast frame both to the higher layers and - * to the wireless media */ - ieee->ap->bridged_multicast++; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - printk(KERN_DEBUG "%s: skb_clone failed for " - "multicast frame\n", dev->name); - } else if (hostap_is_sta_assoc(ieee->ap, dst)) { - /* send frame directly to the associated STA using - * wireless media and not passing to higher layers */ - ieee->ap->bridged_unicast++; - skb2 = skb; - skb = NULL; - } - } - - if (skb2 != NULL) { - /* send to wireless media */ - skb2->dev = dev; - skb2->protocol = htons(ETH_P_802_3); - skb_reset_mac_header(skb2); - skb_reset_network_header(skb2); - /* skb2->network_header += ETH_HLEN; */ - dev_queue_xmit(skb2); - } -#endif - - if (skb) { - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ - if (netif_rx(skb) == NET_RX_DROP) { - /* netif_rx always succeeds, but it might drop - * the packet. If it drops the packet, we log that - * in our stats. */ - IEEE80211_DEBUG_DROP - ("RX: netif_rx dropped the packet\n"); - stats->rx_dropped++; - } - } - - rx_exit: -#ifdef NOT_YET - if (sta) - hostap_handle_sta_release(sta); -#endif - return 1; - - rx_dropped: - stats->rx_dropped++; - - /* Returning 0 indicates to caller that we have not handled the SKB-- - * so it is still allocated and can be used again by underlying - * hardware as a DMA target */ - return 0; -} - -/* Filter out unrelated packets, call ieee80211_rx[_mgt] - * This function takes over the skb, it should not be used again after calling - * this function. */ -void ieee80211_rx_any(struct ieee80211_device *ieee, - struct sk_buff *skb, struct ieee80211_rx_stats *stats) -{ - struct ieee80211_hdr_4addr *hdr; - int is_packet_for_us; - u16 fc; - - if (ieee->iw_mode == IW_MODE_MONITOR) { - if (!ieee80211_rx(ieee, skb, stats)) - dev_kfree_skb_irq(skb); - return; - } - - if (skb->len < sizeof(struct ieee80211_hdr)) - goto drop_free; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - fc = le16_to_cpu(hdr->frame_ctl); - - if ((fc & IEEE80211_FCTL_VERS) != 0) - goto drop_free; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_MGMT: - if (skb->len < sizeof(struct ieee80211_hdr_3addr)) - goto drop_free; - ieee80211_rx_mgt(ieee, hdr, stats); - dev_kfree_skb_irq(skb); - return; - case IEEE80211_FTYPE_DATA: - break; - case IEEE80211_FTYPE_CTL: - return; - default: - return; - } - - is_packet_for_us = 0; - switch (ieee->iw_mode) { - case IW_MODE_ADHOC: - /* our BSS and not from/to DS */ - if (memcmp(hdr->addr3, ieee->bssid, ETH_ALEN) == 0) - if ((fc & (IEEE80211_FCTL_TODS+IEEE80211_FCTL_FROMDS)) == 0) { - /* promisc: get all */ - if (ieee->dev->flags & IFF_PROMISC) - is_packet_for_us = 1; - /* to us */ - else if (memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN) == 0) - is_packet_for_us = 1; - /* mcast */ - else if (is_multicast_ether_addr(hdr->addr1)) - is_packet_for_us = 1; - } - break; - case IW_MODE_INFRA: - /* our BSS (== from our AP) and from DS */ - if (memcmp(hdr->addr2, ieee->bssid, ETH_ALEN) == 0) - if ((fc & (IEEE80211_FCTL_TODS+IEEE80211_FCTL_FROMDS)) == IEEE80211_FCTL_FROMDS) { - /* promisc: get all */ - if (ieee->dev->flags & IFF_PROMISC) - is_packet_for_us = 1; - /* to us */ - else if (memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN) == 0) - is_packet_for_us = 1; - /* mcast */ - else if (is_multicast_ether_addr(hdr->addr1)) { - /* not our own packet bcasted from AP */ - if (memcmp(hdr->addr3, ieee->dev->dev_addr, ETH_ALEN)) - is_packet_for_us = 1; - } - } - break; - default: - /* ? */ - break; - } - - if (is_packet_for_us) - if (!ieee80211_rx(ieee, skb, stats)) - dev_kfree_skb_irq(skb); - return; - -drop_free: - dev_kfree_skb_irq(skb); - ieee->stats.rx_dropped++; - return; -} - -#define MGMT_FRAME_FIXED_PART_LENGTH 0x24 - -static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 }; - -/* -* Make ther structure we read from the beacon packet has -* the right values -*/ -static int ieee80211_verify_qos_info(struct ieee80211_qos_information_element - *info_element, int sub_type) -{ - - if (info_element->qui_subtype != sub_type) - return -1; - if (memcmp(info_element->qui, qos_oui, QOS_OUI_LEN)) - return -1; - if (info_element->qui_type != QOS_OUI_TYPE) - return -1; - if (info_element->version != QOS_VERSION_1) - return -1; - - return 0; -} - -/* - * Parse a QoS parameter element - */ -static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info - *element_param, struct ieee80211_info_element - *info_element) -{ - int ret = 0; - u16 size = sizeof(struct ieee80211_qos_parameter_info) - 2; - - if ((info_element == NULL) || (element_param == NULL)) - return -1; - - if (info_element->id == QOS_ELEMENT_ID && info_element->len == size) { - memcpy(element_param->info_element.qui, info_element->data, - info_element->len); - element_param->info_element.elementID = info_element->id; - element_param->info_element.length = info_element->len; - } else - ret = -1; - if (ret == 0) - ret = ieee80211_verify_qos_info(&element_param->info_element, - QOS_OUI_PARAM_SUB_TYPE); - return ret; -} - -/* - * Parse a QoS information element - */ -static int ieee80211_read_qos_info_element(struct - ieee80211_qos_information_element - *element_info, struct ieee80211_info_element - *info_element) -{ - int ret = 0; - u16 size = sizeof(struct ieee80211_qos_information_element) - 2; - - if (element_info == NULL) - return -1; - if (info_element == NULL) - return -1; - - if ((info_element->id == QOS_ELEMENT_ID) && (info_element->len == size)) { - memcpy(element_info->qui, info_element->data, - info_element->len); - element_info->elementID = info_element->id; - element_info->length = info_element->len; - } else - ret = -1; - - if (ret == 0) - ret = ieee80211_verify_qos_info(element_info, - QOS_OUI_INFO_SUB_TYPE); - return ret; -} - -/* - * Write QoS parameters from the ac parameters. - */ -static int ieee80211_qos_convert_ac_to_parameters(struct - ieee80211_qos_parameter_info - *param_elm, struct - ieee80211_qos_parameters - *qos_param) -{ - int rc = 0; - int i; - struct ieee80211_qos_ac_parameter *ac_params; - u32 txop; - u8 cw_min; - u8 cw_max; - - for (i = 0; i < QOS_QUEUE_NUM; i++) { - ac_params = &(param_elm->ac_params_record[i]); - - qos_param->aifs[i] = (ac_params->aci_aifsn) & 0x0F; - qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2; - - cw_min = ac_params->ecw_min_max & 0x0F; - qos_param->cw_min[i] = cpu_to_le16((1 << cw_min) - 1); - - cw_max = (ac_params->ecw_min_max & 0xF0) >> 4; - qos_param->cw_max[i] = cpu_to_le16((1 << cw_max) - 1); - - qos_param->flag[i] = - (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00; - - txop = le16_to_cpu(ac_params->tx_op_limit) * 32; - qos_param->tx_op_limit[i] = cpu_to_le16(txop); - } - return rc; -} - -/* - * we have a generic data element which it may contain QoS information or - * parameters element. check the information element length to decide - * which type to read - */ -static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element - *info_element, - struct ieee80211_network *network) -{ - int rc = 0; - struct ieee80211_qos_parameters *qos_param = NULL; - struct ieee80211_qos_information_element qos_info_element; - - rc = ieee80211_read_qos_info_element(&qos_info_element, info_element); - - if (rc == 0) { - network->qos_data.param_count = qos_info_element.ac_info & 0x0F; - network->flags |= NETWORK_HAS_QOS_INFORMATION; - } else { - struct ieee80211_qos_parameter_info param_element; - - rc = ieee80211_read_qos_param_element(¶m_element, - info_element); - if (rc == 0) { - qos_param = &(network->qos_data.parameters); - ieee80211_qos_convert_ac_to_parameters(¶m_element, - qos_param); - network->flags |= NETWORK_HAS_QOS_PARAMETERS; - network->qos_data.param_count = - param_element.info_element.ac_info & 0x0F; - } - } - - if (rc == 0) { - IEEE80211_DEBUG_QOS("QoS is supported\n"); - network->qos_data.supported = 1; - } - return rc; -} - -#ifdef CONFIG_IEEE80211_DEBUG -#define MFIE_STRING(x) case MFIE_TYPE_ ##x: return #x - -static const char *get_info_element_string(u16 id) -{ - switch (id) { - MFIE_STRING(SSID); - MFIE_STRING(RATES); - MFIE_STRING(FH_SET); - MFIE_STRING(DS_SET); - MFIE_STRING(CF_SET); - MFIE_STRING(TIM); - MFIE_STRING(IBSS_SET); - MFIE_STRING(COUNTRY); - MFIE_STRING(HOP_PARAMS); - MFIE_STRING(HOP_TABLE); - MFIE_STRING(REQUEST); - MFIE_STRING(CHALLENGE); - MFIE_STRING(POWER_CONSTRAINT); - MFIE_STRING(POWER_CAPABILITY); - MFIE_STRING(TPC_REQUEST); - MFIE_STRING(TPC_REPORT); - MFIE_STRING(SUPP_CHANNELS); - MFIE_STRING(CSA); - MFIE_STRING(MEASURE_REQUEST); - MFIE_STRING(MEASURE_REPORT); - MFIE_STRING(QUIET); - MFIE_STRING(IBSS_DFS); - MFIE_STRING(ERP_INFO); - MFIE_STRING(RSN); - MFIE_STRING(RATES_EX); - MFIE_STRING(GENERIC); - MFIE_STRING(QOS_PARAMETER); - default: - return "UNKNOWN"; - } -} -#endif - -static int ieee80211_parse_info_param(struct ieee80211_info_element - *info_element, u16 length, - struct ieee80211_network *network) -{ - u8 i; -#ifdef CONFIG_IEEE80211_DEBUG - char rates_str[64]; - char *p; -#endif - - while (length >= sizeof(*info_element)) { - if (sizeof(*info_element) + info_element->len > length) { - IEEE80211_DEBUG_MGMT("Info elem: parse failed: " - "info_element->len + 2 > left : " - "info_element->len+2=%zd left=%d, id=%d.\n", - info_element->len + - sizeof(*info_element), - length, info_element->id); - /* We stop processing but don't return an error here - * because some misbehaviour APs break this rule. ie. - * Orinoco AP1000. */ - break; - } - - switch (info_element->id) { - case MFIE_TYPE_SSID: - if (ieee80211_is_empty_essid(info_element->data, - info_element->len)) { - network->flags |= NETWORK_EMPTY_ESSID; - break; - } - - network->ssid_len = min(info_element->len, - (u8) IW_ESSID_MAX_SIZE); - memcpy(network->ssid, info_element->data, - network->ssid_len); - if (network->ssid_len < IW_ESSID_MAX_SIZE) - memset(network->ssid + network->ssid_len, 0, - IW_ESSID_MAX_SIZE - network->ssid_len); - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n", - network->ssid, network->ssid_len); - break; - - case MFIE_TYPE_RATES: -#ifdef CONFIG_IEEE80211_DEBUG - p = rates_str; -#endif - network->rates_len = min(info_element->len, - MAX_RATES_LENGTH); - for (i = 0; i < network->rates_len; i++) { - network->rates[i] = info_element->data[i]; -#ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - - (p - rates_str), "%02X ", - network->rates[i]); -#endif - if (ieee80211_is_ofdm_rate - (info_element->data[i])) { - network->flags |= NETWORK_HAS_OFDM; - if (info_element->data[i] & - IEEE80211_BASIC_RATE_MASK) - network->flags &= - ~NETWORK_HAS_CCK; - } - } - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES: '%s' (%d)\n", - rates_str, network->rates_len); - break; - - case MFIE_TYPE_RATES_EX: -#ifdef CONFIG_IEEE80211_DEBUG - p = rates_str; -#endif - network->rates_ex_len = min(info_element->len, - MAX_RATES_EX_LENGTH); - for (i = 0; i < network->rates_ex_len; i++) { - network->rates_ex[i] = info_element->data[i]; -#ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, sizeof(rates_str) - - (p - rates_str), "%02X ", - network->rates[i]); -#endif - if (ieee80211_is_ofdm_rate - (info_element->data[i])) { - network->flags |= NETWORK_HAS_OFDM; - if (info_element->data[i] & - IEEE80211_BASIC_RATE_MASK) - network->flags &= - ~NETWORK_HAS_CCK; - } - } - - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES_EX: '%s' (%d)\n", - rates_str, network->rates_ex_len); - break; - - case MFIE_TYPE_DS_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_DS_SET: %d\n", - info_element->data[0]); - network->channel = info_element->data[0]; - break; - - case MFIE_TYPE_FH_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_FH_SET: ignored\n"); - break; - - case MFIE_TYPE_CF_SET: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_CF_SET: ignored\n"); - break; - - case MFIE_TYPE_TIM: - network->tim.tim_count = info_element->data[0]; - network->tim.tim_period = info_element->data[1]; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_TIM: partially ignored\n"); - break; - - case MFIE_TYPE_ERP_INFO: - network->erp_value = info_element->data[0]; - network->flags |= NETWORK_HAS_ERP_VALUE; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n", - network->erp_value); - break; - - case MFIE_TYPE_IBSS_SET: - network->atim_window = info_element->data[0]; - IEEE80211_DEBUG_MGMT("MFIE_TYPE_IBSS_SET: %d\n", - network->atim_window); - break; - - case MFIE_TYPE_CHALLENGE: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_CHALLENGE: ignored\n"); - break; - - case MFIE_TYPE_GENERIC: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_GENERIC: %d bytes\n", - info_element->len); - if (!ieee80211_parse_qos_info_param_IE(info_element, - network)) - break; - - if (info_element->len >= 4 && - info_element->data[0] == 0x00 && - info_element->data[1] == 0x50 && - info_element->data[2] == 0xf2 && - info_element->data[3] == 0x01) { - network->wpa_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); - memcpy(network->wpa_ie, info_element, - network->wpa_ie_len); - } - break; - - case MFIE_TYPE_RSN: - IEEE80211_DEBUG_MGMT("MFIE_TYPE_RSN: %d bytes\n", - info_element->len); - network->rsn_ie_len = min(info_element->len + 2, - MAX_WPA_IE_LEN); - memcpy(network->rsn_ie, info_element, - network->rsn_ie_len); - break; - - case MFIE_TYPE_QOS_PARAMETER: - printk(KERN_ERR - "QoS Error need to parse QOS_PARAMETER IE\n"); - break; - /* 802.11h */ - case MFIE_TYPE_POWER_CONSTRAINT: - network->power_constraint = info_element->data[0]; - network->flags |= NETWORK_HAS_POWER_CONSTRAINT; - break; - - case MFIE_TYPE_CSA: - network->power_constraint = info_element->data[0]; - network->flags |= NETWORK_HAS_CSA; - break; - - case MFIE_TYPE_QUIET: - network->quiet.count = info_element->data[0]; - network->quiet.period = info_element->data[1]; - network->quiet.duration = info_element->data[2]; - network->quiet.offset = info_element->data[3]; - network->flags |= NETWORK_HAS_QUIET; - break; - - case MFIE_TYPE_IBSS_DFS: - if (network->ibss_dfs) - break; - network->ibss_dfs = kmemdup(info_element->data, - info_element->len, - GFP_ATOMIC); - if (!network->ibss_dfs) - return 1; - network->flags |= NETWORK_HAS_IBSS_DFS; - break; - - case MFIE_TYPE_TPC_REPORT: - network->tpc_report.transmit_power = - info_element->data[0]; - network->tpc_report.link_margin = info_element->data[1]; - network->flags |= NETWORK_HAS_TPC_REPORT; - break; - - default: - IEEE80211_DEBUG_MGMT - ("Unsupported info element: %s (%d)\n", - get_info_element_string(info_element->id), - info_element->id); - break; - } - - length -= sizeof(*info_element) + info_element->len; - info_element = - (struct ieee80211_info_element *)&info_element-> - data[info_element->len]; - } - - return 0; -} - -static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct ieee80211_assoc_response - *frame, struct ieee80211_rx_stats *stats) -{ - struct ieee80211_network network_resp = { - .ibss_dfs = NULL, - }; - struct ieee80211_network *network = &network_resp; - struct net_device *dev = ieee->dev; - - network->flags = 0; - network->qos_data.active = 0; - network->qos_data.supported = 0; - network->qos_data.param_count = 0; - network->qos_data.old_param_count = 0; - - //network->atim_window = le16_to_cpu(frame->aid) & (0x3FFF); - network->atim_window = le16_to_cpu(frame->aid); - network->listen_interval = le16_to_cpu(frame->status); - memcpy(network->bssid, frame->header.addr3, ETH_ALEN); - network->capability = le16_to_cpu(frame->capability); - network->last_scanned = jiffies; - network->rates_len = network->rates_ex_len = 0; - network->last_associate = 0; - network->ssid_len = 0; - network->erp_value = - (network->capability & WLAN_CAPABILITY_IBSS) ? 0x3 : 0x0; - - if (stats->freq == IEEE80211_52GHZ_BAND) { - /* for A band (No DS info) */ - network->channel = stats->received_channel; - } else - network->flags |= NETWORK_HAS_CCK; - - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; - - if (ieee80211_parse_info_param - (frame->info_element, stats->len - sizeof(*frame), network)) - return 1; - - network->mode = 0; - if (stats->freq == IEEE80211_52GHZ_BAND) - network->mode = IEEE_A; - else { - if (network->flags & NETWORK_HAS_OFDM) - network->mode |= IEEE_G; - if (network->flags & NETWORK_HAS_CCK) - network->mode |= IEEE_B; - } - - if (ieee80211_is_empty_essid(network->ssid, network->ssid_len)) - network->flags |= NETWORK_EMPTY_ESSID; - - memcpy(&network->stats, stats, sizeof(network->stats)); - - if (ieee->handle_assoc_response != NULL) - ieee->handle_assoc_response(dev, frame, network); - - return 0; -} - -/***************************************************/ - -static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response - *beacon, - struct ieee80211_network *network, - struct ieee80211_rx_stats *stats) -{ - DECLARE_MAC_BUF(mac); - - network->qos_data.active = 0; - network->qos_data.supported = 0; - network->qos_data.param_count = 0; - network->qos_data.old_param_count = 0; - - /* Pull out fixed field data */ - memcpy(network->bssid, beacon->header.addr3, ETH_ALEN); - network->capability = le16_to_cpu(beacon->capability); - network->last_scanned = jiffies; - network->time_stamp[0] = le32_to_cpu(beacon->time_stamp[0]); - network->time_stamp[1] = le32_to_cpu(beacon->time_stamp[1]); - network->beacon_interval = le16_to_cpu(beacon->beacon_interval); - /* Where to pull this? beacon->listen_interval; */ - network->listen_interval = 0x0A; - network->rates_len = network->rates_ex_len = 0; - network->last_associate = 0; - network->ssid_len = 0; - network->flags = 0; - network->atim_window = 0; - network->erp_value = (network->capability & WLAN_CAPABILITY_IBSS) ? - 0x3 : 0x0; - - if (stats->freq == IEEE80211_52GHZ_BAND) { - /* for A band (No DS info) */ - network->channel = stats->received_channel; - } else - network->flags |= NETWORK_HAS_CCK; - - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; - - if (ieee80211_parse_info_param - (beacon->info_element, stats->len - sizeof(*beacon), network)) - return 1; - - network->mode = 0; - if (stats->freq == IEEE80211_52GHZ_BAND) - network->mode = IEEE_A; - else { - if (network->flags & NETWORK_HAS_OFDM) - network->mode |= IEEE_G; - if (network->flags & NETWORK_HAS_CCK) - network->mode |= IEEE_B; - } - - if (network->mode == 0) { - IEEE80211_DEBUG_SCAN("Filtered out '%s (%s)' " - "network.\n", - escape_essid(network->ssid, - network->ssid_len), - print_mac(mac, network->bssid)); - return 1; - } - - if (ieee80211_is_empty_essid(network->ssid, network->ssid_len)) - network->flags |= NETWORK_EMPTY_ESSID; - - memcpy(&network->stats, stats, sizeof(network->stats)); - - return 0; -} - -static inline int is_same_network(struct ieee80211_network *src, - struct ieee80211_network *dst) -{ - /* A network is only a duplicate if the channel, BSSID, and ESSID - * all match. We treat all <hidden> with the same BSSID and channel - * as one network */ - return ((src->ssid_len == dst->ssid_len) && - (src->channel == dst->channel) && - !compare_ether_addr(src->bssid, dst->bssid) && - !memcmp(src->ssid, dst->ssid, src->ssid_len)); -} - -static void update_network(struct ieee80211_network *dst, - struct ieee80211_network *src) -{ - int qos_active; - u8 old_param; - DECLARE_MAC_BUF(mac); - - ieee80211_network_reset(dst); - dst->ibss_dfs = src->ibss_dfs; - - /* We only update the statistics if they were created by receiving - * the network information on the actual channel the network is on. - * - * This keeps beacons received on neighbor channels from bringing - * down the signal level of an AP. */ - if (dst->channel == src->stats.received_channel) - memcpy(&dst->stats, &src->stats, - sizeof(struct ieee80211_rx_stats)); - else - IEEE80211_DEBUG_SCAN("Network %s info received " - "off channel (%d vs. %d)\n", print_mac(mac, src->bssid), - dst->channel, src->stats.received_channel); - - dst->capability = src->capability; - memcpy(dst->rates, src->rates, src->rates_len); - dst->rates_len = src->rates_len; - memcpy(dst->rates_ex, src->rates_ex, src->rates_ex_len); - dst->rates_ex_len = src->rates_ex_len; - - dst->mode = src->mode; - dst->flags = src->flags; - dst->time_stamp[0] = src->time_stamp[0]; - dst->time_stamp[1] = src->time_stamp[1]; - - dst->beacon_interval = src->beacon_interval; - dst->listen_interval = src->listen_interval; - dst->atim_window = src->atim_window; - dst->erp_value = src->erp_value; - dst->tim = src->tim; - - memcpy(dst->wpa_ie, src->wpa_ie, src->wpa_ie_len); - dst->wpa_ie_len = src->wpa_ie_len; - memcpy(dst->rsn_ie, src->rsn_ie, src->rsn_ie_len); - dst->rsn_ie_len = src->rsn_ie_len; - - dst->last_scanned = jiffies; - qos_active = src->qos_data.active; - old_param = dst->qos_data.old_param_count; - if (dst->flags & NETWORK_HAS_QOS_MASK) - memcpy(&dst->qos_data, &src->qos_data, - sizeof(struct ieee80211_qos_data)); - else { - dst->qos_data.supported = src->qos_data.supported; - dst->qos_data.param_count = src->qos_data.param_count; - } - - if (dst->qos_data.supported == 1) { - if (dst->ssid_len) - IEEE80211_DEBUG_QOS - ("QoS the network %s is QoS supported\n", - dst->ssid); - else - IEEE80211_DEBUG_QOS - ("QoS the network is QoS supported\n"); - } - dst->qos_data.active = qos_active; - dst->qos_data.old_param_count = old_param; - - /* dst->last_associate is not overwritten */ -} - -static inline int is_beacon(__le16 fc) -{ - return (WLAN_FC_GET_STYPE(le16_to_cpu(fc)) == IEEE80211_STYPE_BEACON); -} - -static void ieee80211_process_probe_response(struct ieee80211_device - *ieee, struct - ieee80211_probe_response - *beacon, struct ieee80211_rx_stats - *stats) -{ - struct net_device *dev = ieee->dev; - struct ieee80211_network network = { - .ibss_dfs = NULL, - }; - struct ieee80211_network *target; - struct ieee80211_network *oldest = NULL; -#ifdef CONFIG_IEEE80211_DEBUG - struct ieee80211_info_element *info_element = beacon->info_element; -#endif - unsigned long flags; - DECLARE_MAC_BUF(mac); - - IEEE80211_DEBUG_SCAN("'%s' (%s" - "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", - escape_essid(info_element->data, info_element->len), - print_mac(mac, beacon->header.addr3), - (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xd)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xc)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xb)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0xa)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x9)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x8)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x7)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x6)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x5)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x4)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x3)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x2)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0', - (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0'); - - if (ieee80211_network_init(ieee, beacon, &network, stats)) { - IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n", - escape_essid(info_element->data, - info_element->len), - print_mac(mac, beacon->header.addr3), - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); - return; - } - - /* The network parsed correctly -- so now we scan our known networks - * to see if we can find it in our list. - * - * NOTE: This search is definitely not optimized. Once its doing - * the "right thing" we'll optimize it for efficiency if - * necessary */ - - /* Search for this entry in the list and update it if it is - * already there. */ - - spin_lock_irqsave(&ieee->lock, flags); - - list_for_each_entry(target, &ieee->network_list, list) { - if (is_same_network(target, &network)) - break; - - if ((oldest == NULL) || - (target->last_scanned < oldest->last_scanned)) - oldest = target; - } - - /* If we didn't find a match, then get a new network slot to initialize - * with this beacon's information */ - if (&target->list == &ieee->network_list) { - if (list_empty(&ieee->network_free_list)) { - /* If there are no more slots, expire the oldest */ - list_del(&oldest->list); - target = oldest; - IEEE80211_DEBUG_SCAN("Expired '%s' (%s) from " - "network list.\n", - escape_essid(target->ssid, - target->ssid_len), - print_mac(mac, target->bssid)); - ieee80211_network_reset(target); - } else { - /* Otherwise just pull from the free list */ - target = list_entry(ieee->network_free_list.next, - struct ieee80211_network, list); - list_del(ieee->network_free_list.next); - } - -#ifdef CONFIG_IEEE80211_DEBUG - IEEE80211_DEBUG_SCAN("Adding '%s' (%s) via %s.\n", - escape_essid(network.ssid, - network.ssid_len), - print_mac(mac, network.bssid), - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); -#endif - memcpy(target, &network, sizeof(*target)); - network.ibss_dfs = NULL; - list_add_tail(&target->list, &ieee->network_list); - } else { - IEEE80211_DEBUG_SCAN("Updating '%s' (%s) via %s.\n", - escape_essid(target->ssid, - target->ssid_len), - print_mac(mac, target->bssid), - is_beacon(beacon->header.frame_ctl) ? - "BEACON" : "PROBE RESPONSE"); - update_network(target, &network); - network.ibss_dfs = NULL; - } - - spin_unlock_irqrestore(&ieee->lock, flags); - - if (is_beacon(beacon->header.frame_ctl)) { - if (ieee->handle_beacon != NULL) - ieee->handle_beacon(dev, beacon, target); - } else { - if (ieee->handle_probe_response != NULL) - ieee->handle_probe_response(dev, beacon, target); - } -} - -void ieee80211_rx_mgt(struct ieee80211_device *ieee, - struct ieee80211_hdr_4addr *header, - struct ieee80211_rx_stats *stats) -{ - switch (WLAN_FC_GET_STYPE(le16_to_cpu(header->frame_ctl))) { - case IEEE80211_STYPE_ASSOC_RESP: - IEEE80211_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - ieee80211_handle_assoc_resp(ieee, - (struct ieee80211_assoc_response *) - header, stats); - break; - - case IEEE80211_STYPE_REASSOC_RESP: - IEEE80211_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - break; - - case IEEE80211_STYPE_PROBE_REQ: - IEEE80211_DEBUG_MGMT("received auth (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - if (ieee->handle_probe_request != NULL) - ieee->handle_probe_request(ieee->dev, - (struct - ieee80211_probe_request *) - header, stats); - break; - - case IEEE80211_STYPE_PROBE_RESP: - IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_SCAN("Probe response\n"); - ieee80211_process_probe_response(ieee, - (struct - ieee80211_probe_response *) - header, stats); - break; - - case IEEE80211_STYPE_BEACON: - IEEE80211_DEBUG_MGMT("received BEACON (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_SCAN("Beacon\n"); - ieee80211_process_probe_response(ieee, - (struct - ieee80211_probe_response *) - header, stats); - break; - case IEEE80211_STYPE_AUTH: - - IEEE80211_DEBUG_MGMT("received auth (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - if (ieee->handle_auth != NULL) - ieee->handle_auth(ieee->dev, - (struct ieee80211_auth *)header); - break; - - case IEEE80211_STYPE_DISASSOC: - if (ieee->handle_disassoc != NULL) - ieee->handle_disassoc(ieee->dev, - (struct ieee80211_disassoc *) - header); - break; - - case IEEE80211_STYPE_ACTION: - IEEE80211_DEBUG_MGMT("ACTION\n"); - if (ieee->handle_action) - ieee->handle_action(ieee->dev, - (struct ieee80211_action *) - header, stats); - break; - - case IEEE80211_STYPE_REASSOC_REQ: - IEEE80211_DEBUG_MGMT("received reassoc (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - IEEE80211_DEBUG_MGMT("%s: IEEE80211_REASSOC_REQ received\n", - ieee->dev->name); - if (ieee->handle_reassoc_request != NULL) - ieee->handle_reassoc_request(ieee->dev, - (struct ieee80211_reassoc_request *) - header); - break; - - case IEEE80211_STYPE_ASSOC_REQ: - IEEE80211_DEBUG_MGMT("received assoc (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - - IEEE80211_DEBUG_MGMT("%s: IEEE80211_ASSOC_REQ received\n", - ieee->dev->name); - if (ieee->handle_assoc_request != NULL) - ieee->handle_assoc_request(ieee->dev); - break; - - case IEEE80211_STYPE_DEAUTH: - IEEE80211_DEBUG_MGMT("DEAUTH\n"); - if (ieee->handle_deauth != NULL) - ieee->handle_deauth(ieee->dev, - (struct ieee80211_deauth *) - header); - break; - default: - IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n", - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - IEEE80211_DEBUG_MGMT("%s: Unknown management packet: %d\n", - ieee->dev->name, - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); - break; - } -} - -EXPORT_SYMBOL_GPL(ieee80211_rx_any); -EXPORT_SYMBOL(ieee80211_rx_mgt); -EXPORT_SYMBOL(ieee80211_rx); diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c deleted file mode 100644 index d996547f7a6..00000000000 --- a/net/ieee80211/ieee80211_tx.c +++ /dev/null @@ -1,545 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/if_arp.h> -#include <linux/in6.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/tcp.h> -#include <linux/types.h> -#include <linux/wireless.h> -#include <linux/etherdevice.h> -#include <asm/uaccess.h> - -#include <net/ieee80211.h> - -/* - -802.11 Data Frame - - ,-------------------------------------------------------------------. -Bytes | 2 | 2 | 6 | 6 | 6 | 2 | 0..2312 | 4 | - |------|------|---------|---------|---------|------|---------|------| -Desc. | ctrl | dura | DA/RA | TA | SA | Sequ | Frame | fcs | - | | tion | (BSSID) | | | ence | data | | - `--------------------------------------------------| |------' -Total: 28 non-data bytes `----.----' - | - .- 'Frame data' expands, if WEP enabled, to <----------' - | - V - ,-----------------------. -Bytes | 4 | 0-2296 | 4 | - |-----|-----------|-----| -Desc. | IV | Encrypted | ICV | - | | Packet | | - `-----| |-----' - `-----.-----' - | - .- 'Encrypted Packet' expands to - | - V - ,---------------------------------------------------. -Bytes | 1 | 1 | 1 | 3 | 2 | 0-2304 | - |------|------|---------|----------|------|---------| -Desc. | SNAP | SNAP | Control |Eth Tunnel| Type | IP | - | DSAP | SSAP | | | | Packet | - | 0xAA | 0xAA |0x03 (UI)|0x00-00-F8| | | - `---------------------------------------------------- -Total: 8 non-data bytes - -802.3 Ethernet Data Frame - - ,-----------------------------------------. -Bytes | 6 | 6 | 2 | Variable | 4 | - |-------|-------|------|-----------|------| -Desc. | Dest. | Source| Type | IP Packet | fcs | - | MAC | MAC | | | | - `-----------------------------------------' -Total: 18 non-data bytes - -In the event that fragmentation is required, the incoming payload is split into -N parts of size ieee->fts. The first fragment contains the SNAP header and the -remaining packets are just data. - -If encryption is enabled, each fragment payload size is reduced by enough space -to add the prefix and postfix (IV and ICV totalling 8 bytes in the case of WEP) -So if you have 1500 bytes of payload with ieee->fts set to 500 without -encryption it will take 3 frames. With WEP it will take 4 frames as the -payload of each frame is reduced to 492 bytes. - -* SKB visualization -* -* ,- skb->data -* | -* | ETHERNET HEADER ,-<-- PAYLOAD -* | | 14 bytes from skb->data -* | 2 bytes for Type --> ,T. | (sizeof ethhdr) -* | | | | -* |,-Dest.--. ,--Src.---. | | | -* | 6 bytes| | 6 bytes | | | | -* v | | | | | | -* 0 | v 1 | v | v 2 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 -* ^ | ^ | ^ | -* | | | | | | -* | | | | `T' <---- 2 bytes for Type -* | | | | -* | | '---SNAP--' <-------- 6 bytes for SNAP -* | | -* `-IV--' <-------------------- 4 bytes for IV (WEP) -* -* SNAP HEADER -* -*/ - -static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; -static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; - -static int ieee80211_copy_snap(u8 * data, __be16 h_proto) -{ - struct ieee80211_snap_hdr *snap; - u8 *oui; - - snap = (struct ieee80211_snap_hdr *)data; - snap->dsap = 0xaa; - snap->ssap = 0xaa; - snap->ctrl = 0x03; - - if (h_proto == htons(ETH_P_AARP) || h_proto == htons(ETH_P_IPX)) - oui = P802_1H_OUI; - else - oui = RFC1042_OUI; - snap->oui[0] = oui[0]; - snap->oui[1] = oui[1]; - snap->oui[2] = oui[2]; - - memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16)); - - return SNAP_SIZE + sizeof(u16); -} - -static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee, - struct sk_buff *frag, int hdr_len) -{ - struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; - int res; - - if (crypt == NULL) - return -1; - - /* To encrypt, frame format is: - * IV (4 bytes), clear payload (including SNAP), ICV (4 bytes) */ - atomic_inc(&crypt->refcnt); - res = 0; - if (crypt->ops && crypt->ops->encrypt_mpdu) - res = crypt->ops->encrypt_mpdu(frag, hdr_len, crypt->priv); - - atomic_dec(&crypt->refcnt); - if (res < 0) { - printk(KERN_INFO "%s: Encryption failed: len=%d.\n", - ieee->dev->name, frag->len); - ieee->ieee_stats.tx_discards++; - return -1; - } - - return 0; -} - -void ieee80211_txb_free(struct ieee80211_txb *txb) -{ - int i; - if (unlikely(!txb)) - return; - for (i = 0; i < txb->nr_frags; i++) - if (txb->fragments[i]) - dev_kfree_skb_any(txb->fragments[i]); - kfree(txb); -} - -static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, - int headroom, gfp_t gfp_mask) -{ - struct ieee80211_txb *txb; - int i; - txb = kmalloc(sizeof(struct ieee80211_txb) + (sizeof(u8 *) * nr_frags), - gfp_mask); - if (!txb) - return NULL; - - memset(txb, 0, sizeof(struct ieee80211_txb)); - txb->nr_frags = nr_frags; - txb->frag_size = txb_size; - - for (i = 0; i < nr_frags; i++) { - txb->fragments[i] = __dev_alloc_skb(txb_size + headroom, - gfp_mask); - if (unlikely(!txb->fragments[i])) { - i--; - break; - } - skb_reserve(txb->fragments[i], headroom); - } - if (unlikely(i != nr_frags)) { - while (i >= 0) - dev_kfree_skb_any(txb->fragments[i--]); - kfree(txb); - return NULL; - } - return txb; -} - -static int ieee80211_classify(struct sk_buff *skb) -{ - struct ethhdr *eth; - struct iphdr *ip; - - eth = (struct ethhdr *)skb->data; - if (eth->h_proto != htons(ETH_P_IP)) - return 0; - - ip = ip_hdr(skb); - switch (ip->tos & 0xfc) { - case 0x20: - return 2; - case 0x40: - return 1; - case 0x60: - return 3; - case 0x80: - return 4; - case 0xa0: - return 5; - case 0xc0: - return 6; - case 0xe0: - return 7; - default: - return 0; - } -} - -/* Incoming skb is converted to a txb which consists of - * a block of 802.11 fragment packets (stored as skbs) */ -int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee80211_device *ieee = netdev_priv(dev); - struct ieee80211_txb *txb = NULL; - struct ieee80211_hdr_3addrqos *frag_hdr; - int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size, - rts_required; - unsigned long flags; - struct net_device_stats *stats = &ieee->stats; - int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; - __be16 ether_type; - int bytes, fc, hdr_len; - struct sk_buff *skb_frag; - struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */ - .duration_id = 0, - .seq_ctl = 0, - .qos_ctl = 0 - }; - u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt; - int priority = skb->priority; - int snapped = 0; - - if (ieee->is_queue_full && (*ieee->is_queue_full) (dev, priority)) - return NETDEV_TX_BUSY; - - spin_lock_irqsave(&ieee->lock, flags); - - /* If there is no driver handler to take the TXB, dont' bother - * creating it... */ - if (!ieee->hard_start_xmit) { - printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); - goto success; - } - - if (unlikely(skb->len < SNAP_SIZE + sizeof(u16))) { - printk(KERN_WARNING "%s: skb too small (%d).\n", - ieee->dev->name, skb->len); - goto success; - } - - ether_type = ((struct ethhdr *)skb->data)->h_proto; - - crypt = ieee->crypt[ieee->tx_keyidx]; - - encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) && - ieee->sec.encrypt; - - host_encrypt = ieee->host_encrypt && encrypt && crypt; - host_encrypt_msdu = ieee->host_encrypt_msdu && encrypt && crypt; - host_build_iv = ieee->host_build_iv && encrypt && crypt; - - if (!encrypt && ieee->ieee802_1x && - ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) { - stats->tx_dropped++; - goto success; - } - - /* Save source and destination addresses */ - skb_copy_from_linear_data(skb, dest, ETH_ALEN); - skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN); - - if (host_encrypt || host_build_iv) - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | - IEEE80211_FCTL_PROTECTED; - else - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; - - if (ieee->iw_mode == IW_MODE_INFRA) { - fc |= IEEE80211_FCTL_TODS; - /* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */ - memcpy(header.addr1, ieee->bssid, ETH_ALEN); - memcpy(header.addr2, src, ETH_ALEN); - memcpy(header.addr3, dest, ETH_ALEN); - } else if (ieee->iw_mode == IW_MODE_ADHOC) { - /* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */ - memcpy(header.addr1, dest, ETH_ALEN); - memcpy(header.addr2, src, ETH_ALEN); - memcpy(header.addr3, ieee->bssid, ETH_ALEN); - } - hdr_len = IEEE80211_3ADDR_LEN; - - if (ieee->is_qos_active && ieee->is_qos_active(dev, skb)) { - fc |= IEEE80211_STYPE_QOS_DATA; - hdr_len += 2; - - skb->priority = ieee80211_classify(skb); - header.qos_ctl |= cpu_to_le16(skb->priority & IEEE80211_QCTL_TID); - } - header.frame_ctl = cpu_to_le16(fc); - - /* Advance the SKB to the start of the payload */ - skb_pull(skb, sizeof(struct ethhdr)); - - /* Determine total amount of storage required for TXB packets */ - bytes = skb->len + SNAP_SIZE + sizeof(u16); - - /* Encrypt msdu first on the whole data packet. */ - if ((host_encrypt || host_encrypt_msdu) && - crypt && crypt->ops && crypt->ops->encrypt_msdu) { - int res = 0; - int len = bytes + hdr_len + crypt->ops->extra_msdu_prefix_len + - crypt->ops->extra_msdu_postfix_len; - struct sk_buff *skb_new = dev_alloc_skb(len); - - if (unlikely(!skb_new)) - goto failed; - - skb_reserve(skb_new, crypt->ops->extra_msdu_prefix_len); - memcpy(skb_put(skb_new, hdr_len), &header, hdr_len); - snapped = 1; - ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)), - ether_type); - skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len); - res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv); - if (res < 0) { - IEEE80211_ERROR("msdu encryption failed\n"); - dev_kfree_skb_any(skb_new); - goto failed; - } - dev_kfree_skb_any(skb); - skb = skb_new; - bytes += crypt->ops->extra_msdu_prefix_len + - crypt->ops->extra_msdu_postfix_len; - skb_pull(skb, hdr_len); - } - - if (host_encrypt || ieee->host_open_frag) { - /* Determine fragmentation size based on destination (multicast - * and broadcast are not fragmented) */ - if (is_multicast_ether_addr(dest) || - is_broadcast_ether_addr(dest)) - frag_size = MAX_FRAG_THRESHOLD; - else - frag_size = ieee->fts; - - /* Determine amount of payload per fragment. Regardless of if - * this stack is providing the full 802.11 header, one will - * eventually be affixed to this fragment -- so we must account - * for it when determining the amount of payload space. */ - bytes_per_frag = frag_size - hdr_len; - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - bytes_per_frag -= IEEE80211_FCS_LEN; - - /* Each fragment may need to have room for encryptiong - * pre/postfix */ - if (host_encrypt) - bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + - crypt->ops->extra_mpdu_postfix_len; - - /* Number of fragments is the total - * bytes_per_frag / payload_per_fragment */ - nr_frags = bytes / bytes_per_frag; - bytes_last_frag = bytes % bytes_per_frag; - if (bytes_last_frag) - nr_frags++; - else - bytes_last_frag = bytes_per_frag; - } else { - nr_frags = 1; - bytes_per_frag = bytes_last_frag = bytes; - frag_size = bytes + hdr_len; - } - - rts_required = (frag_size > ieee->rts - && ieee->config & CFG_IEEE80211_RTS); - if (rts_required) - nr_frags++; - - /* When we allocate the TXB we allocate enough space for the reserve - * and full fragment bytes (bytes_per_frag doesn't include prefix, - * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(nr_frags, frag_size, - ieee->tx_headroom, GFP_ATOMIC); - if (unlikely(!txb)) { - printk(KERN_WARNING "%s: Could not allocate TXB\n", - ieee->dev->name); - goto failed; - } - txb->encrypted = encrypt; - if (host_encrypt) - txb->payload_size = frag_size * (nr_frags - 1) + - bytes_last_frag; - else - txb->payload_size = bytes; - - if (rts_required) { - skb_frag = txb->fragments[0]; - frag_hdr = - (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); - - /* - * Set header frame_ctl to the RTS. - */ - header.frame_ctl = - cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); - memcpy(frag_hdr, &header, hdr_len); - - /* - * Restore header frame_ctl to the original data setting. - */ - header.frame_ctl = cpu_to_le16(fc); - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - - txb->rts_included = 1; - i = 1; - } else - i = 0; - - for (; i < nr_frags; i++) { - skb_frag = txb->fragments[i]; - - if (host_encrypt || host_build_iv) - skb_reserve(skb_frag, - crypt->ops->extra_mpdu_prefix_len); - - frag_hdr = - (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); - memcpy(frag_hdr, &header, hdr_len); - - /* If this is not the last fragment, then add the MOREFRAGS - * bit to the frame control */ - if (i != nr_frags - 1) { - frag_hdr->frame_ctl = - cpu_to_le16(fc | IEEE80211_FCTL_MOREFRAGS); - bytes = bytes_per_frag; - } else { - /* The last fragment takes the remaining length */ - bytes = bytes_last_frag; - } - - if (i == 0 && !snapped) { - ieee80211_copy_snap(skb_put - (skb_frag, SNAP_SIZE + sizeof(u16)), - ether_type); - bytes -= SNAP_SIZE + sizeof(u16); - } - - skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes); - - /* Advance the SKB... */ - skb_pull(skb, bytes); - - /* Encryption routine will move the header forward in order - * to insert the IV between the header and the payload */ - if (host_encrypt) - ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); - else if (host_build_iv) { - atomic_inc(&crypt->refcnt); - if (crypt->ops->build_iv) - crypt->ops->build_iv(skb_frag, hdr_len, - ieee->sec.keys[ieee->sec.active_key], - ieee->sec.key_sizes[ieee->sec.active_key], - crypt->priv); - atomic_dec(&crypt->refcnt); - } - - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - skb_put(skb_frag, 4); - } - - success: - spin_unlock_irqrestore(&ieee->lock, flags); - - dev_kfree_skb_any(skb); - - if (txb) { - int ret = (*ieee->hard_start_xmit) (txb, dev, priority); - if (ret == 0) { - stats->tx_packets++; - stats->tx_bytes += txb->payload_size; - return 0; - } - - ieee80211_txb_free(txb); - } - - return 0; - - failed: - spin_unlock_irqrestore(&ieee->lock, flags); - netif_stop_queue(dev); - stats->tx_errors++; - return 1; -} - -EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c deleted file mode 100644 index 973832dd7fa..00000000000 --- a/net/ieee80211/ieee80211_wx.c +++ /dev/null @@ -1,760 +0,0 @@ -/****************************************************************************** - - Copyright(c) 2004-2005 Intel Corporation. All rights reserved. - - Portions of this file are based on the WEP enablement code provided by the - Host AP project hostap-drivers v0.1.3 - Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - <j@w1.fi> - Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - - Contact Information: - James P. Ketrenos <ipw2100-admin@linux.intel.com> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -******************************************************************************/ - -#include <linux/kmod.h> -#include <linux/module.h> -#include <linux/jiffies.h> - -#include <net/ieee80211.h> -#include <linux/wireless.h> - -static const char *ieee80211_modes[] = { - "?", "a", "b", "ab", "g", "ag", "bg", "abg" -}; - -#define MAX_CUSTOM_LEN 64 -static char *ieee80211_translate_scan(struct ieee80211_device *ieee, - char *start, char *stop, - struct ieee80211_network *network, - struct iw_request_info *info) -{ - char custom[MAX_CUSTOM_LEN]; - char *p; - struct iw_event iwe; - int i, j; - char *current_val; /* For rates */ - u8 rate; - - /* First entry *MUST* be the AP MAC address */ - iwe.cmd = SIOCGIWAP; - iwe.u.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN); - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN); - - /* Remaining entries will be displayed in the order we provide them */ - - /* Add the ESSID */ - iwe.cmd = SIOCGIWESSID; - iwe.u.data.flags = 1; - if (network->flags & NETWORK_EMPTY_ESSID) { - iwe.u.data.length = sizeof("<hidden>"); - start = iwe_stream_add_point(info, start, stop, - &iwe, "<hidden>"); - } else { - iwe.u.data.length = min(network->ssid_len, (u8) 32); - start = iwe_stream_add_point(info, start, stop, - &iwe, network->ssid); - } - - /* Add the protocol name */ - iwe.cmd = SIOCGIWNAME; - snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", - ieee80211_modes[network->mode]); - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN); - - /* Add mode */ - iwe.cmd = SIOCGIWMODE; - if (network->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) { - if (network->capability & WLAN_CAPABILITY_ESS) - iwe.u.mode = IW_MODE_MASTER; - else - iwe.u.mode = IW_MODE_ADHOC; - - start = iwe_stream_add_event(info, start, stop, - &iwe, IW_EV_UINT_LEN); - } - - /* Add channel and frequency */ - /* Note : userspace automatically computes channel using iwrange */ - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); - iwe.u.freq.e = 6; - iwe.u.freq.i = 0; - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN); - - /* Add encryption capability */ - iwe.cmd = SIOCGIWENCODE; - if (network->capability & WLAN_CAPABILITY_PRIVACY) - iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; - else - iwe.u.data.flags = IW_ENCODE_DISABLED; - iwe.u.data.length = 0; - start = iwe_stream_add_point(info, start, stop, - &iwe, network->ssid); - - /* Add basic and extended rates */ - /* Rate : stuffing multiple values in a single event require a bit - * more of magic - Jean II */ - current_val = start + iwe_stream_lcp_len(info); - iwe.cmd = SIOCGIWRATE; - /* Those two flags are ignored... */ - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - - for (i = 0, j = 0; i < network->rates_len;) { - if (j < network->rates_ex_len && - ((network->rates_ex[j] & 0x7F) < - (network->rates[i] & 0x7F))) - rate = network->rates_ex[j++] & 0x7F; - else - rate = network->rates[i++] & 0x7F; - /* Bit rate given in 500 kb/s units (+ 0x80) */ - iwe.u.bitrate.value = ((rate & 0x7f) * 500000); - /* Add new value to event */ - current_val = iwe_stream_add_value(info, start, current_val, - stop, &iwe, IW_EV_PARAM_LEN); - } - for (; j < network->rates_ex_len; j++) { - rate = network->rates_ex[j] & 0x7F; - /* Bit rate given in 500 kb/s units (+ 0x80) */ - iwe.u.bitrate.value = ((rate & 0x7f) * 500000); - /* Add new value to event */ - current_val = iwe_stream_add_value(info, start, current_val, - stop, &iwe, IW_EV_PARAM_LEN); - } - /* Check if we added any rate */ - if ((current_val - start) > iwe_stream_lcp_len(info)) - start = current_val; - - /* Add quality statistics */ - iwe.cmd = IWEVQUAL; - iwe.u.qual.updated = IW_QUAL_QUAL_UPDATED | IW_QUAL_LEVEL_UPDATED | - IW_QUAL_NOISE_UPDATED; - - if (!(network->stats.mask & IEEE80211_STATMASK_RSSI)) { - iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID | - IW_QUAL_LEVEL_INVALID; - iwe.u.qual.qual = 0; - } else { - if (ieee->perfect_rssi == ieee->worst_rssi) - iwe.u.qual.qual = 100; - else - iwe.u.qual.qual = - (100 * - (ieee->perfect_rssi - ieee->worst_rssi) * - (ieee->perfect_rssi - ieee->worst_rssi) - - (ieee->perfect_rssi - network->stats.rssi) * - (15 * (ieee->perfect_rssi - ieee->worst_rssi) + - 62 * (ieee->perfect_rssi - - network->stats.rssi))) / - ((ieee->perfect_rssi - - ieee->worst_rssi) * (ieee->perfect_rssi - - ieee->worst_rssi)); - if (iwe.u.qual.qual > 100) - iwe.u.qual.qual = 100; - else if (iwe.u.qual.qual < 1) - iwe.u.qual.qual = 0; - } - - if (!(network->stats.mask & IEEE80211_STATMASK_NOISE)) { - iwe.u.qual.updated |= IW_QUAL_NOISE_INVALID; - iwe.u.qual.noise = 0; - } else { - iwe.u.qual.noise = network->stats.noise; - } - - if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL)) { - iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID; - iwe.u.qual.level = 0; - } else { - iwe.u.qual.level = network->stats.signal; - } - - start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN); - - iwe.cmd = IWEVCUSTOM; - p = custom; - - iwe.u.data.length = p - custom; - if (iwe.u.data.length) - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - - memset(&iwe, 0, sizeof(iwe)); - if (network->wpa_ie_len) { - char buf[MAX_WPA_IE_LEN]; - memcpy(buf, network->wpa_ie, network->wpa_ie_len); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = network->wpa_ie_len; - start = iwe_stream_add_point(info, start, stop, &iwe, buf); - } - - memset(&iwe, 0, sizeof(iwe)); - if (network->rsn_ie_len) { - char buf[MAX_WPA_IE_LEN]; - memcpy(buf, network->rsn_ie, network->rsn_ie_len); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = network->rsn_ie_len; - start = iwe_stream_add_point(info, start, stop, &iwe, buf); - } - - /* Add EXTRA: Age to display seconds since last beacon/probe response - * for given network. */ - iwe.cmd = IWEVCUSTOM; - p = custom; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - " Last beacon: %dms ago", - jiffies_to_msecs(jiffies - network->last_scanned)); - iwe.u.data.length = p - custom; - if (iwe.u.data.length) - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - - /* Add spectrum management information */ - iwe.cmd = -1; - p = custom; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Channel flags: "); - - if (ieee80211_get_channel_flags(ieee, network->channel) & - IEEE80211_CH_INVALID) { - iwe.cmd = IWEVCUSTOM; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "INVALID "); - } - - if (ieee80211_get_channel_flags(ieee, network->channel) & - IEEE80211_CH_RADAR_DETECT) { - iwe.cmd = IWEVCUSTOM; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "DFS "); - } - - if (iwe.cmd == IWEVCUSTOM) { - iwe.u.data.length = p - custom; - start = iwe_stream_add_point(info, start, stop, &iwe, custom); - } - - return start; -} - -#define SCAN_ITEM_SIZE 128 - -int ieee80211_wx_get_scan(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct ieee80211_network *network; - unsigned long flags; - int err = 0; - - char *ev = extra; - char *stop = ev + wrqu->data.length; - int i = 0; - DECLARE_MAC_BUF(mac); - - IEEE80211_DEBUG_WX("Getting scan\n"); - - spin_lock_irqsave(&ieee->lock, flags); - - list_for_each_entry(network, &ieee->network_list, list) { - i++; - if (stop - ev < SCAN_ITEM_SIZE) { - err = -E2BIG; - break; - } - - if (ieee->scan_age == 0 || - time_after(network->last_scanned + ieee->scan_age, jiffies)) - ev = ieee80211_translate_scan(ieee, ev, stop, network, - info); - else - IEEE80211_DEBUG_SCAN("Not showing network '%s (" - "%s)' due to age (%dms).\n", - escape_essid(network->ssid, - network->ssid_len), - print_mac(mac, network->bssid), - jiffies_to_msecs(jiffies - - network-> - last_scanned)); - } - - spin_unlock_irqrestore(&ieee->lock, flags); - - wrqu->data.length = ev - extra; - wrqu->data.flags = 0; - - IEEE80211_DEBUG_WX("exit: %d networks returned.\n", i); - - return err; -} - -int ieee80211_wx_set_encode(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *keybuf) -{ - struct iw_point *erq = &(wrqu->encoding); - struct net_device *dev = ieee->dev; - struct ieee80211_security sec = { - .flags = 0 - }; - int i, key, key_provided, len; - struct ieee80211_crypt_data **crypt; - int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv; - - IEEE80211_DEBUG_WX("SET_ENCODE\n"); - - key = erq->flags & IW_ENCODE_INDEX; - if (key) { - if (key > WEP_KEYS) - return -EINVAL; - key--; - key_provided = 1; - } else { - key_provided = 0; - key = ieee->tx_keyidx; - } - - IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ? - "provided" : "default"); - - crypt = &ieee->crypt[key]; - - if (erq->flags & IW_ENCODE_DISABLED) { - if (key_provided && *crypt) { - IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n", - key); - ieee80211_crypt_delayed_deinit(ieee, crypt); - } else - IEEE80211_DEBUG_WX("Disabling encryption.\n"); - - /* Check all the keys to see if any are still configured, - * and if no key index was provided, de-init them all */ - for (i = 0; i < WEP_KEYS; i++) { - if (ieee->crypt[i] != NULL) { - if (key_provided) - break; - ieee80211_crypt_delayed_deinit(ieee, - &ieee->crypt[i]); - } - } - - if (i == WEP_KEYS) { - sec.enabled = 0; - sec.encrypt = 0; - sec.level = SEC_LEVEL_0; - sec.flags |= SEC_ENABLED | SEC_LEVEL | SEC_ENCRYPT; - } - - goto done; - } - - sec.enabled = 1; - sec.encrypt = 1; - sec.flags |= SEC_ENABLED | SEC_ENCRYPT; - - if (*crypt != NULL && (*crypt)->ops != NULL && - strcmp((*crypt)->ops->name, "WEP") != 0) { - /* changing to use WEP; deinit previously used algorithm - * on this key */ - ieee80211_crypt_delayed_deinit(ieee, crypt); - } - - if (*crypt == NULL && host_crypto) { - struct ieee80211_crypt_data *new_crypt; - - /* take WEP into use */ - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), - GFP_KERNEL); - if (new_crypt == NULL) - return -ENOMEM; - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); - if (!new_crypt->ops) { - request_module("ieee80211_crypt_wep"); - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); - } - - if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) - new_crypt->priv = new_crypt->ops->init(key); - - if (!new_crypt->ops || !new_crypt->priv) { - kfree(new_crypt); - new_crypt = NULL; - - printk(KERN_WARNING "%s: could not initialize WEP: " - "load module ieee80211_crypt_wep\n", dev->name); - return -EOPNOTSUPP; - } - *crypt = new_crypt; - } - - /* If a new key was provided, set it up */ - if (erq->length > 0) { - len = erq->length <= 5 ? 5 : 13; - memcpy(sec.keys[key], keybuf, erq->length); - if (len > erq->length) - memset(sec.keys[key] + erq->length, 0, - len - erq->length); - IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n", - key, escape_essid(sec.keys[key], len), - erq->length, len); - sec.key_sizes[key] = len; - if (*crypt) - (*crypt)->ops->set_key(sec.keys[key], len, NULL, - (*crypt)->priv); - sec.flags |= (1 << key); - /* This ensures a key will be activated if no key is - * explicitly set */ - if (key == sec.active_key) - sec.flags |= SEC_ACTIVE_KEY; - - } else { - if (host_crypto) { - len = (*crypt)->ops->get_key(sec.keys[key], WEP_KEY_LEN, - NULL, (*crypt)->priv); - if (len == 0) { - /* Set a default key of all 0 */ - IEEE80211_DEBUG_WX("Setting key %d to all " - "zero.\n", key); - memset(sec.keys[key], 0, 13); - (*crypt)->ops->set_key(sec.keys[key], 13, NULL, - (*crypt)->priv); - sec.key_sizes[key] = 13; - sec.flags |= (1 << key); - } - } - /* No key data - just set the default TX key index */ - if (key_provided) { - IEEE80211_DEBUG_WX("Setting key %d to default Tx " - "key.\n", key); - ieee->tx_keyidx = key; - sec.active_key = key; - sec.flags |= SEC_ACTIVE_KEY; - } - } - if (erq->flags & (IW_ENCODE_OPEN | IW_ENCODE_RESTRICTED)) { - ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED); - sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : - WLAN_AUTH_SHARED_KEY; - sec.flags |= SEC_AUTH_MODE; - IEEE80211_DEBUG_WX("Auth: %s\n", - sec.auth_mode == WLAN_AUTH_OPEN ? - "OPEN" : "SHARED KEY"); - } - - /* For now we just support WEP, so only set that security level... - * TODO: When WPA is added this is one place that needs to change */ - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */ - sec.encode_alg[key] = SEC_ALG_WEP; - - done: - if (ieee->set_security) - ieee->set_security(dev, &sec); - - /* Do not reset port if card is in Managed mode since resetting will - * generate new IEEE 802.11 authentication which may end up in looping - * with IEEE 802.1X. If your hardware requires a reset after WEP - * configuration (for example... Prism2), implement the reset_port in - * the callbacks structures used to initialize the 802.11 stack. */ - if (ieee->reset_on_keychange && - ieee->iw_mode != IW_MODE_INFRA && - ieee->reset_port && ieee->reset_port(dev)) { - printk(KERN_DEBUG "%s: reset_port failed\n", dev->name); - return -EINVAL; - } - return 0; -} - -int ieee80211_wx_get_encode(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *keybuf) -{ - struct iw_point *erq = &(wrqu->encoding); - int len, key; - struct ieee80211_crypt_data *crypt; - struct ieee80211_security *sec = &ieee->sec; - - IEEE80211_DEBUG_WX("GET_ENCODE\n"); - - key = erq->flags & IW_ENCODE_INDEX; - if (key) { - if (key > WEP_KEYS) - return -EINVAL; - key--; - } else - key = ieee->tx_keyidx; - - crypt = ieee->crypt[key]; - erq->flags = key + 1; - - if (!sec->enabled) { - erq->length = 0; - erq->flags |= IW_ENCODE_DISABLED; - return 0; - } - - len = sec->key_sizes[key]; - memcpy(keybuf, sec->keys[key], len); - - erq->length = len; - erq->flags |= IW_ENCODE_ENABLED; - - if (ieee->open_wep) - erq->flags |= IW_ENCODE_OPEN; - else - erq->flags |= IW_ENCODE_RESTRICTED; - - return 0; -} - -int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct net_device *dev = ieee->dev; - struct iw_point *encoding = &wrqu->encoding; - struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - int i, idx, ret = 0; - int group_key = 0; - const char *alg, *module; - struct ieee80211_crypto_ops *ops; - struct ieee80211_crypt_data **crypt; - - struct ieee80211_security sec = { - .flags = 0, - }; - - idx = encoding->flags & IW_ENCODE_INDEX; - if (idx) { - if (idx < 1 || idx > WEP_KEYS) - return -EINVAL; - idx--; - } else - idx = ieee->tx_keyidx; - - if (ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) { - crypt = &ieee->crypt[idx]; - group_key = 1; - } else { - /* some Cisco APs use idx>0 for unicast in dynamic WEP */ - if (idx != 0 && ext->alg != IW_ENCODE_ALG_WEP) - return -EINVAL; - if (ieee->iw_mode == IW_MODE_INFRA) - crypt = &ieee->crypt[idx]; - else - return -EINVAL; - } - - sec.flags |= SEC_ENABLED | SEC_ENCRYPT; - if ((encoding->flags & IW_ENCODE_DISABLED) || - ext->alg == IW_ENCODE_ALG_NONE) { - if (*crypt) - ieee80211_crypt_delayed_deinit(ieee, crypt); - - for (i = 0; i < WEP_KEYS; i++) - if (ieee->crypt[i] != NULL) - break; - - if (i == WEP_KEYS) { - sec.enabled = 0; - sec.encrypt = 0; - sec.level = SEC_LEVEL_0; - sec.flags |= SEC_LEVEL; - } - goto done; - } - - sec.enabled = 1; - sec.encrypt = 1; - - if (group_key ? !ieee->host_mc_decrypt : - !(ieee->host_encrypt || ieee->host_decrypt || - ieee->host_encrypt_msdu)) - goto skip_host_crypt; - - switch (ext->alg) { - case IW_ENCODE_ALG_WEP: - alg = "WEP"; - module = "ieee80211_crypt_wep"; - break; - case IW_ENCODE_ALG_TKIP: - alg = "TKIP"; - module = "ieee80211_crypt_tkip"; - break; - case IW_ENCODE_ALG_CCMP: - alg = "CCMP"; - module = "ieee80211_crypt_ccmp"; - break; - default: - IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", - dev->name, ext->alg); - ret = -EINVAL; - goto done; - } - - ops = ieee80211_get_crypto_ops(alg); - if (ops == NULL) { - request_module(module); - ops = ieee80211_get_crypto_ops(alg); - } - if (ops == NULL) { - IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", - dev->name, ext->alg); - ret = -EINVAL; - goto done; - } - - if (*crypt == NULL || (*crypt)->ops != ops) { - struct ieee80211_crypt_data *new_crypt; - - ieee80211_crypt_delayed_deinit(ieee, crypt); - - new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL); - if (new_crypt == NULL) { - ret = -ENOMEM; - goto done; - } - new_crypt->ops = ops; - if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) - new_crypt->priv = new_crypt->ops->init(idx); - if (new_crypt->priv == NULL) { - kfree(new_crypt); - ret = -EINVAL; - goto done; - } - *crypt = new_crypt; - } - - if (ext->key_len > 0 && (*crypt)->ops->set_key && - (*crypt)->ops->set_key(ext->key, ext->key_len, ext->rx_seq, - (*crypt)->priv) < 0) { - IEEE80211_DEBUG_WX("%s: key setting failed\n", dev->name); - ret = -EINVAL; - goto done; - } - - skip_host_crypt: - if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { - ieee->tx_keyidx = idx; - sec.active_key = idx; - sec.flags |= SEC_ACTIVE_KEY; - } - - if (ext->alg != IW_ENCODE_ALG_NONE) { - memcpy(sec.keys[idx], ext->key, ext->key_len); - sec.key_sizes[idx] = ext->key_len; - sec.flags |= (1 << idx); - if (ext->alg == IW_ENCODE_ALG_WEP) { - sec.encode_alg[idx] = SEC_ALG_WEP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_1; - } else if (ext->alg == IW_ENCODE_ALG_TKIP) { - sec.encode_alg[idx] = SEC_ALG_TKIP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_2; - } else if (ext->alg == IW_ENCODE_ALG_CCMP) { - sec.encode_alg[idx] = SEC_ALG_CCMP; - sec.flags |= SEC_LEVEL; - sec.level = SEC_LEVEL_3; - } - /* Don't set sec level for group keys. */ - if (group_key) - sec.flags &= ~SEC_LEVEL; - } - done: - if (ieee->set_security) - ieee->set_security(ieee->dev, &sec); - - /* - * Do not reset port if card is in Managed mode since resetting will - * generate new IEEE 802.11 authentication which may end up in looping - * with IEEE 802.1X. If your hardware requires a reset after WEP - * configuration (for example... Prism2), implement the reset_port in - * the callbacks structures used to initialize the 802.11 stack. - */ - if (ieee->reset_on_keychange && - ieee->iw_mode != IW_MODE_INFRA && - ieee->reset_port && ieee->reset_port(dev)) { - IEEE80211_DEBUG_WX("%s: reset_port failed\n", dev->name); - return -EINVAL; - } - - return ret; -} - -int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct iw_point *encoding = &wrqu->encoding; - struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - struct ieee80211_security *sec = &ieee->sec; - int idx, max_key_len; - - max_key_len = encoding->length - sizeof(*ext); - if (max_key_len < 0) - return -EINVAL; - - idx = encoding->flags & IW_ENCODE_INDEX; - if (idx) { - if (idx < 1 || idx > WEP_KEYS) - return -EINVAL; - idx--; - } else - idx = ieee->tx_keyidx; - - if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) && - ext->alg != IW_ENCODE_ALG_WEP) - if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) - return -EINVAL; - - encoding->flags = idx + 1; - memset(ext, 0, sizeof(*ext)); - - if (!sec->enabled) { - ext->alg = IW_ENCODE_ALG_NONE; - ext->key_len = 0; - encoding->flags |= IW_ENCODE_DISABLED; - } else { - if (sec->encode_alg[idx] == SEC_ALG_WEP) - ext->alg = IW_ENCODE_ALG_WEP; - else if (sec->encode_alg[idx] == SEC_ALG_TKIP) - ext->alg = IW_ENCODE_ALG_TKIP; - else if (sec->encode_alg[idx] == SEC_ALG_CCMP) - ext->alg = IW_ENCODE_ALG_CCMP; - else - return -EINVAL; - - ext->key_len = sec->key_sizes[idx]; - memcpy(ext->key, sec->keys[idx], ext->key_len); - encoding->flags |= IW_ENCODE_ENABLED; - if (ext->key_len && - (ext->alg == IW_ENCODE_ALG_TKIP || - ext->alg == IW_ENCODE_ALG_CCMP)) - ext->ext_flags |= IW_ENCODE_EXT_TX_SEQ_VALID; - - } - - return 0; -} - -EXPORT_SYMBOL(ieee80211_wx_set_encodeext); -EXPORT_SYMBOL(ieee80211_wx_get_encodeext); - -EXPORT_SYMBOL(ieee80211_wx_get_scan); -EXPORT_SYMBOL(ieee80211_wx_set_encode); -EXPORT_SYMBOL(ieee80211_wx_get_encode); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1aa2dc9e380..743f5542d65 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -94,6 +94,7 @@ #include <linux/igmp.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <net/checksum.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> @@ -245,7 +246,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) int hash; struct net_protocol *ipprot; - if (net == &init_net) + if (net_eq(net, &init_net)) return 1; hash = protocol & (MAX_INET_PROTOS - 1); @@ -272,10 +273,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (sock->type != SOCK_RAW && - sock->type != SOCK_DGRAM && - !inet_ehash_secret) - build_ehash_secret(); + if (unlikely(!inet_ehash_secret)) + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + build_ehash_secret(); sock->state = SS_UNCONNECTED; @@ -1070,11 +1070,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "%s(): shifting inet->" - "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", - __func__, - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); + printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n", + __func__, &old_saddr, &new_saddr); } inet->saddr = inet->rcv_saddr = new_saddr; @@ -1245,6 +1242,100 @@ out: return segs; } +static struct sk_buff **inet_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct net_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct iphdr *iph; + int flush = 1; + int proto; + int id; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = ip_hdr(skb); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + if (iph->version != 4 || iph->ihl != 5) + goto out_unlock; + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto out_unlock; + + flush = ntohs(iph->tot_len) != skb->len || + iph->frag_off != htons(IP_DF); + id = ntohs(iph->id); + + for (p = *head; p; p = p->next) { + struct iphdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ip_hdr(p); + + if (iph->protocol != iph2->protocol || + iph->tos != iph2->tos || + memcmp(&iph->saddr, &iph2->saddr, 8)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* All fields must match except length and checksum. */ + NAPI_GRO_CB(p)->flush |= + memcmp(&iph->frag_off, &iph2->frag_off, 4) || + (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + __skb_pull(skb, sizeof(*iph)); + skb_reset_transport_header(skb); + + pp = ops->gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int inet_gro_complete(struct sk_buff *skb) +{ + struct net_protocol *ops; + struct iphdr *iph = ip_hdr(skb); + int proto = iph->protocol & (MAX_INET_PROTOS - 1); + int err = -ENOSYS; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); + + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1311,6 +1402,7 @@ EXPORT_SYMBOL_GPL(snmp_mib_free); #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, + .netns_ok = 1, }; #endif @@ -1319,6 +1411,8 @@ static struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; @@ -1411,6 +1505,8 @@ static struct packet_type ip_packet_type = { .func = ip_rcv, .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, }; static int __init inet_init(void) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 8219b7e0968..e878e494296 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,15 +201,16 @@ out: static void ah4_err(struct sk_buff *skb, u32 info) { - struct iphdr *iph = (struct iphdr*)skb->data; - struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); + struct net *net = dev_net(skb->dev); + struct iphdr *iph = (struct iphdr *)skb->data; + struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", @@ -293,9 +294,7 @@ static void ah_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } @@ -316,6 +315,7 @@ static struct net_protocol ah4_protocol = { .handler = xfrm4_rcv, .err_handler = ah4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init ah4_init(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1a9dd66511f..29a74c01d8d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -506,7 +506,7 @@ int arp_bind_neighbour(struct dst_entry *dst) if (dev == NULL) return -EINVAL; if (n == NULL) { - __be32 nexthop = ((struct rtable*)dst)->rt_gateway; + __be32 nexthop = ((struct rtable *)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; n = __neigh_lookup_errno( @@ -640,14 +640,14 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr=(unsigned char *)(arp+1); memcpy(arp_ptr, src_hw, dev->addr_len); - arp_ptr+=dev->addr_len; - memcpy(arp_ptr, &src_ip,4); - arp_ptr+=4; + arp_ptr += dev->addr_len; + memcpy(arp_ptr, &src_ip, 4); + arp_ptr += 4; if (target_hw != NULL) memcpy(arp_ptr, target_hw, dev->addr_len); else memset(arp_ptr, 0, dev->addr_len); - arp_ptr+=dev->addr_len; + arp_ptr += dev->addr_len; memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -818,18 +818,18 @@ static int arp_process(struct sk_buff *skb) addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { - n = neigh_event_ns(&arp_tbl, sha, &sip, dev); - if (n) { - int dont_send = 0; - - if (!dont_send) - dont_send |= arp_ignore(in_dev,sip,tip); - if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip,tip,dev); - if (!dont_send) - arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + int dont_send = 0; - neigh_release(n); + if (!dont_send) + dont_send |= arp_ignore(in_dev,sip,tip); + if (!dont_send && IN_DEV_ARPFILTER(in_dev)) + dont_send |= arp_filter(sip,tip,dev); + if (!dont_send) { + n = neigh_event_ns(&arp_tbl, sha, &sip, dev); + if (n) { + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + neigh_release(n); + } } goto out; } else if (IN_DEV_FORWARD(in_dev)) { @@ -1308,7 +1308,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) } #endif - sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key)); + sprintf(tbuf, "%pI4", n->primary_key); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); @@ -1321,7 +1321,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq, int hatype = dev ? dev->type : 0; char tbuf[16]; - sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key)); + sprintf(tbuf, "%pI4", n->key); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", dev ? dev->name : "*"); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2e78f6bd977..e52799047a5 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -490,7 +490,6 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } atomic_set(&doi_def->refcount, 1); - INIT_RCU_HEAD(&doi_def->rcu); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 56fce3ab6c5..309997edc8a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -112,13 +112,7 @@ static inline void devinet_sysctl_unregister(struct in_device *idev) static struct in_ifaddr *inet_alloc_ifa(void) { - struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); - - if (ifa) { - INIT_RCU_HEAD(&ifa->rcu_head); - } - - return ifa; + return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); } static void inet_rcu_free_ifa(struct rcu_head *head) @@ -161,7 +155,6 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) goto out; - INIT_RCU_HEAD(&in_dev->rcu_head); memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; @@ -1108,7 +1101,7 @@ out: } static struct notifier_block ip_netdev_notifier = { - .notifier_call =inetdev_event, + .notifier_call = inetdev_event, }; static inline size_t inet_nlmsg_size(void) @@ -1195,7 +1188,7 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, +static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 pid) { struct sk_buff *skb; @@ -1262,7 +1255,7 @@ static void inet_forward_change(struct net *net) } static int devinet_conf_proc(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); @@ -1334,7 +1327,7 @@ static int devinet_conf_sysctl(ctl_table *table, } static int devinet_sysctl_forward(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; @@ -1363,7 +1356,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 21515d4c49e..18bb383ea39 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -413,15 +413,16 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) static void esp4_err(struct sk_buff *skb, u32 info) { - struct iphdr *iph = (struct iphdr*)skb->data; - struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); + struct net *net = dev_net(skb->dev); + struct iphdr *iph = (struct iphdr *)skb->data; + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", @@ -618,6 +619,7 @@ static struct net_protocol esp4_protocol = { .handler = xfrm4_rcv, .err_handler = esp4_err, .no_policy = 1, + .netns_ok = 1, }; static int __init esp4_init(void) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 65c1503f8cc..741e4fa3e47 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -578,7 +578,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -600,7 +600,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -903,7 +903,7 @@ static void fib_disable_ip(struct net_device *dev, int force) static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; switch (event) { @@ -964,11 +964,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo } static struct notifier_block fib_inetaddr_notifier = { - .notifier_call =fib_inetaddr_event, + .notifier_call = fib_inetaddr_event, }; static struct notifier_block fib_netdev_notifier = { - .notifier_call =fib_netdev_event, + .notifier_call = fib_netdev_event, }; static int __net_init ip_fib_net_init(struct net *net) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index c8cac6c7f88..ded8c44fb84 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -247,7 +247,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result { int err; struct fn_zone *fz; - struct fn_hash *t = (struct fn_hash*)tb->tb_data; + struct fn_hash *t = (struct fn_hash *)tb->tb_data; read_lock(&fib_hash_lock); for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { @@ -283,7 +283,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib struct fib_node *f; struct fib_info *fi = NULL; struct fib_info *last_resort; - struct fn_hash *t = (struct fn_hash*)tb->tb_data; + struct fn_hash *t = (struct fn_hash *)tb->tb_data; struct fn_zone *fz = t->fn_zones[0]; if (fz == NULL) @@ -548,7 +548,7 @@ out: static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { - struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_hash *table = (struct fn_hash *)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; struct fn_zone *fz; @@ -748,7 +748,7 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin { int m, s_m; struct fn_zone *fz; - struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_hash *table = (struct fn_hash *)tb->tb_data; s_m = cb->args[2]; read_lock(&fib_hash_lock); @@ -845,10 +845,10 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) struct hlist_node *node; struct fib_node *fn; - hlist_for_each_entry(fn,node,iter->hash_head,fn_hash) { + hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { struct fib_alias *fa; - list_for_each_entry(fa,&fn->fn_alias,fa_list) { + list_for_each_entry(fa, &fn->fn_alias, fa_list) { iter->fn = fn; iter->fa = fa; goto out; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ded2ae34eab..4817dea3bc7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -63,16 +63,16 @@ static DEFINE_SPINLOCK(fib_multipath_lock); for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ -for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) +for (nhsel=0, nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ /* Hope, that gcc will optimize it to get rid of dummy loop */ -#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ +#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ for (nhsel=0; nhsel < 1; nhsel++) -#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ +#define change_nexthops(fi) { int nhsel = 0; struct fib_nh * nh = (struct fib_nh *)((fi)->fib_nh); \ for (nhsel=0; nhsel < 1; nhsel++) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -358,7 +358,7 @@ int fib_detect_death(struct fib_info *fi, int order, state = n->nud_state; neigh_release(n); } - if (state==NUD_REACHABLE) + if (state == NUD_REACHABLE) return 0; if ((state&NUD_VALID) && order != dflt) return 0; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5cb72786a8a..ec0ae490f0b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2399,8 +2399,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n", - NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + seq_printf(seq, " +-- %pI4/%d %d %d %d\n", + &prf, tn->pos, tn->bits, tn->full_children, tn->empty_children); } else { @@ -2410,7 +2410,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) __be32 val = htonl(l->key); seq_indent(seq, iter->depth); - seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val)); + seq_printf(seq, " |-- %pI4\n", &val); hlist_for_each_entry_rcu(li, node, &l->list, hlist) { struct fib_alias *fa; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72b2de76f1c..705b33b184a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, } static void icmp_push_reply(struct icmp_bxm *icmp_param, - struct ipcm_cookie *ipc, struct rtable *rt) + struct ipcm_cookie *ipc, struct rtable **rt) { struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net(rt->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) - icmp_push_reply(icmp_param, &ipc, rt); + icmp_push_reply(icmp_param, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (err) goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); switch (err) { case 0: @@ -635,7 +635,7 @@ route_done: icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &ipc, rt); + icmp_push_reply(&icmp_param, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: @@ -683,10 +683,8 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": " - "fragmentation needed " - "and DF set.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n", + &iph->daddr); } else { info = ip_rt_frag_needed(net, iph, ntohs(icmph->un.frag.mtu), @@ -696,9 +694,8 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source " - "Route Failed.\n", - NIPQUAD(iph->daddr)); + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n", + &iph->daddr); break; default: break; @@ -729,12 +726,12 @@ static void icmp_unreach(struct sk_buff *skb) if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) - printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP " + printk(KERN_WARNING "%pI4 sent an invalid ICMP " "type %u, code %u " - "error to a broadcast: " NIPQUAD_FMT " on %s\n", - NIPQUAD(ip_hdr(skb)->saddr), + "error to a broadcast: %pI4 on %s\n", + &ip_hdr(skb)->saddr, icmph->type, icmph->code, - NIPQUAD(iph->daddr), + &iph->daddr, skb->dev->name); goto out; } @@ -952,9 +949,8 @@ static void icmp_address_reply(struct sk_buff *skb) break; } if (!ifa && net_ratelimit()) { - printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from " - "%s/" NIPQUAD_FMT "\n", - NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); + printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", + mp, dev->name, &rt->rt_src); } } rcu_read_unlock(); @@ -976,9 +972,10 @@ int icmp_rcv(struct sk_buff *skb) struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a0d86455c53..9eb6219af61 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -167,7 +167,7 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) spin_lock_bh(&im->lock); if (del_timer(&im->timer)) atomic_dec(&im->refcnt); - im->tm_running=0; + im->tm_running = 0; im->reporter = 0; im->unsolicit_count = 0; spin_unlock_bh(&im->lock); @@ -176,9 +176,9 @@ static __inline__ void igmp_stop_timer(struct ip_mc_list *im) /* It must be called with locked im->lock */ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) { - int tv=net_random() % max_delay; + int tv = net_random() % max_delay; - im->tm_running=1; + im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) atomic_inc(&im->refcnt); } @@ -207,7 +207,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) if (del_timer(&im->timer)) { if ((long)(im->timer.expires-jiffies) < max_delay) { add_timer(&im->timer); - im->tm_running=1; + im->tm_running = 1; spin_unlock_bh(&im->lock); return; } @@ -358,7 +358,7 @@ static int igmpv3_sendpack(struct sk_buff *skb) static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) { - return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc,type,gdel,sdel); + return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc, type, gdel, sdel); } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -682,11 +682,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, ((u8*)&iph[1])[3] = 0; ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - ih->type=type; - ih->code=0; - ih->csum=0; - ih->group=group; - ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); + ih->type = type; + ih->code = 0; + ih->csum = 0; + ih->group = group; + ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); return ip_local_out(skb); } @@ -728,7 +728,7 @@ static void igmp_timer_expire(unsigned long data) struct in_device *in_dev = im->interface; spin_lock(&im->lock); - im->tm_running=0; + im->tm_running = 0; if (im->unsolicit_count) { im->unsolicit_count--; @@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) --ANK */ if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_add(dev,buf,dev->addr_len,0); + dev_mc_add(dev, buf, dev->addr_len, 0); } /* @@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) struct net_device *dev = in_dev->dev; if (arp_mc_map(addr, buf, dev, 0) == 0) - dev_mc_delete(dev,buf,dev->addr_len,0); + dev_mc_delete(dev, buf, dev->addr_len, 0); } #ifdef CONFIG_IP_MULTICAST @@ -1210,10 +1210,10 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) if (!im) goto out; - im->users=1; - im->interface=in_dev; + im->users = 1; + im->interface = in_dev; in_dev_hold(in_dev); - im->multiaddr=addr; + im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; im->sfcount[MCAST_INCLUDE] = 0; @@ -1224,7 +1224,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running=0; + im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; im->reporter = 0; @@ -1232,8 +1232,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) #endif im->loaded = 0; write_lock_bh(&in_dev->mc_list_lock); - im->next=in_dev->mc_list; - in_dev->mc_list=im; + im->next = in_dev->mc_list; + in_dev->mc_list = im; in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST @@ -1279,7 +1279,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { - if (i->multiaddr==addr) { + if (i->multiaddr == addr) { if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; @@ -1738,7 +1738,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; __be32 addr = imr->imr_multiaddr.s_addr; - struct ip_mc_socklist *iml=NULL, *i; + struct ip_mc_socklist *iml = NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -1769,7 +1769,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -ENOBUFS; if (count >= sysctl_igmp_max_memberships) goto done; - iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); + iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (iml == NULL) goto done; @@ -2275,6 +2275,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p #if defined(CONFIG_PROC_FS) struct igmp_mc_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *in_dev; }; @@ -2283,11 +2284,12 @@ struct igmp_mc_iter_state { static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_mc_list *im = NULL; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2408,7 +2410,7 @@ static const struct seq_operations igmp_mc_seq_ops = { static int igmp_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp_mc_seq_ops, + return seq_open_net(inode, file, &igmp_mc_seq_ops, sizeof(struct igmp_mc_iter_state)); } @@ -2417,10 +2419,11 @@ static const struct file_operations igmp_mc_seq_fops = { .open = igmp_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct igmp_mcf_iter_state { + struct seq_net_private p; struct net_device *dev; struct in_device *idev; struct ip_mc_list *im; @@ -2430,13 +2433,14 @@ struct igmp_mcf_iter_state { static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ip_sf_list *psf = NULL; struct ip_mc_list *im = NULL; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); state->idev = NULL; state->im = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2567,7 +2571,7 @@ static const struct seq_operations igmp_mcf_seq_ops = { static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp_mcf_seq_ops, + return seq_open_net(inode, file, &igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state)); } @@ -2576,14 +2580,41 @@ static const struct file_operations igmp_mcf_seq_fops = { .open = igmp_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init igmp_mc_proc_init(void) +static int igmp_net_init(struct net *net) { - proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + struct proc_dir_entry *pde; + + pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + if (!pde) + goto out_igmp; + pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + if (!pde) + goto out_mcfilter; return 0; + +out_mcfilter: + proc_net_remove(net, "igmp"); +out_igmp: + return -ENOMEM; +} + +static void igmp_net_exit(struct net *net) +{ + proc_net_remove(net, "mcfilter"); + proc_net_remove(net, "igmp"); +} + +static struct pernet_operations igmp_net_ops = { + .init = igmp_net_init, + .exit = igmp_net_exit, +}; + +int __init igmp_mc_proc_init(void) +{ + return register_pernet_subsys(&igmp_net_ops); } #endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bd1278a2d82..c7cda1ca8e6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) goto next; break; next: @@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == snum) + if (ib_net(tb) == net && tb->port == snum) goto tb_found; } tb = NULL; @@ -323,7 +323,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); -struct dst_entry* inet_csk_route_req(struct sock *sk, +struct dst_entry *inet_csk_route_req(struct sock *sk, const struct request_sock *req) { struct rtable *rt; @@ -344,16 +344,17 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); - return NULL; - } + if (ip_route_output_flow(net, &rt, &fl, sk, 0)) + goto no_route; + if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + goto route_err; return &rt->u.dst; + +route_err: + ip_rt_put(rt); +no_route: + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; } EXPORT_SYMBOL_GPL(inet_csk_route_req); @@ -561,7 +562,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(sk->sk_prot->orphan_count); + percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -632,6 +633,8 @@ void inet_csk_listen_stop(struct sock *sk) acc_req = req->dl_next; + percpu_counter_inc(sk->sk_prot->orphan_count); + local_bh_disable(); bh_lock_sock(child); WARN_ON(sock_owned_by_user(child)); @@ -641,8 +644,6 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - atomic_inc(sk->sk_prot->orphan_count); - inet_csk_destroy_sock(child); bh_unlock_sock(child); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb..588a7796e3e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; + struct inet_listen_hashbucket *ilb; num = 0; - sk_for_each(sk, node, &hashinfo->listening_hash[i]) { + ilb = &hashinfo->listening_hash[i]; + spin_lock_bh(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -751,7 +753,7 @@ syn_recv: goto next_listen; if (inet_diag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -760,12 +762,12 @@ next_listen: cb->args[4] = 0; ++num; } + spin_unlock_bh(&ilb->lock); s_num = 0; cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; @@ -776,20 +778,21 @@ skip_listen_ht: for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - rwlock_t *lock = inet_ehash_lockp(hashinfo, i); + spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; num = 0; - if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain) && + hlist_nulls_empty(&head->twchain)) continue; if (i > s_i) s_num = 0; - read_lock_bh(lock); - sk_for_each(sk, node, &head->chain) { + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) @@ -803,7 +806,7 @@ skip_listen_ht: r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_normal: @@ -825,14 +828,14 @@ next_normal: r->id.idiag_dport) goto next_dying; if (inet_twsk_diag_dump(tw, skb, cb) < 0) { - read_unlock_bh(lock); + spin_unlock_bh(lock); goto done; } next_dying: ++num; } } - read_unlock_bh(lock); + spin_unlock_bh(lock); } done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb9..6a1045da48d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - tb->ib_net = hold_net(net); + write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(tb->ib_net); + release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -110,33 +110,29 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); -/* - * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ -void inet_listen_wlock(struct inet_hashinfo *hashinfo) - __acquires(hashinfo->lhash_lock) +static inline int compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, const __be32 daddr, + const int dif) { - write_lock(&hashinfo->lhash_lock); - - if (atomic_read(&hashinfo->lhash_users)) { - DEFINE_WAIT(wait); + int score = -1; + struct inet_sock *inet = inet_sk(sk); - for (;;) { - prepare_to_wait_exclusive(&hashinfo->lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&hashinfo->lhash_users)) - break; - write_unlock_bh(&hashinfo->lhash_lock); - schedule(); - write_lock_bh(&hashinfo->lhash_lock); + if (net_eq(sock_net(sk), net) && inet->num == hnum && + !ipv6_only_sock(sk)) { + __be32 rcv_saddr = inet->rcv_saddr; + score = sk->sk_family == PF_INET ? 1 : 0; + if (rcv_saddr) { + if (rcv_saddr != daddr) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; } - - finish_wait(&hashinfo->lhash_wait, &wait); } + return score; } /* @@ -145,72 +141,48 @@ void inet_listen_wlock(struct inet_hashinfo *hashinfo) * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(struct net *net, - const struct hlist_head *head, - const __be32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - const struct hlist_node *node; - int hiscore = -1; - - sk_for_each(sk, node, head) { - const struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->num == hnum && - !ipv6_only_sock(sk)) { - const __be32 rcv_saddr = inet->rcv_saddr; - int score = sk->sk_family == PF_INET ? 1 : 0; - - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score += 2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score += 2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} -/* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk = NULL; - const struct hlist_head *head; - - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) - goto sherry_cache; - sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each_rcu(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + result = sk; + hiscore = score; + } } - if (sk) { -sherry_cache: - sock_hold(sk); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } } - read_unlock(&hashinfo->lhash_lock); - return sk; + rcu_read_unlock(); + return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -223,35 +195,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -270,16 +272,15 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +294,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,9 +307,9 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp) { *twp = tw; @@ -324,7 +325,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } @@ -338,8 +339,8 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct hlist_nulls_head *list; + spinlock_t *lock; struct inet_ehash_bucket *head; WARN_ON(!sk_unhashed(sk)); @@ -349,18 +350,17 @@ void __inet_hash_nolisten(struct sock *sk) list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock(lock); - __sk_add_node(sk, list); + spin_lock(lock); + __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk); @@ -368,14 +368,12 @@ static void __inet_hash(struct sock *sk) } WARN_ON(!sk_unhashed(sk)); - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + spin_lock(&ilb->lock); + __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); - wake_up(&hashinfo->lhash_wait); + spin_unlock(&ilb->lock); } void inet_hash(struct sock *sk) @@ -390,27 +388,23 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - rwlock_t *lock; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + spinlock_t *lock; + int done; if (sk_unhashed(sk)) - goto out; + return; - if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(hashinfo); - lock = &hashinfo->lhash_lock; - } else { + if (sk->sk_state == TCP_LISTEN) + lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + else lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock_bh(lock); - } - if (__sk_del_node_init(sk)) + spin_lock_bh(lock); + done =__sk_nulls_del_node_init_rcu(sk); + if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(lock); -out: - if (sk->sk_state == TCP_LISTEN) - wake_up(&hashinfo->lhash_wait); + spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -449,7 +443,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->ib_net == net && tb->port == port) { + if (ib_net(tb) == net && tb->port == port) { WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -524,3 +518,16 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); + +void inet_hashinfo_init(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i < INET_LHTABLE_SIZE; i++) { + spin_lock_init(&h->listening_hash[i].lock); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, + i + LISTENING_NULLS_BASE); + } +} + +EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96..6a667dae315 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); tcph->check = 0; - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, lro_desc->ip_tot_len - @@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) __wsum tcp_ps_hdr_csum; tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, len + TCP_HDR_LEN(tcph), diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f882..8554d0ea171 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(lock); - if (hlist_unhashed(&tw->tw_node)) { - write_unlock(lock); + spin_lock(lock); + if (hlist_nulls_unhashed(&tw->tw_node)) { + spin_unlock(lock); return; } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); - write_unlock(lock); + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, @@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -90,17 +90,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(lock); + spin_lock(lock); - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - /* Step 3: Hash TW into TIMEWAIT chain. */ - inet_twsk_add_node(tw, &ehead->twchain); + /* + * Step 2: Hash TW into TIMEWAIT chain. + * Should be done before removing sk from established chain + * because readers are lockless and search established first. + */ atomic_inc(&tw->tw_refcnt); + inet_twsk_add_node_rcu(tw, &ehead->twchain); - write_unlock(lock); + /* Step 3: Remove SK from established hash. */ + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -416,17 +420,17 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, { struct inet_timewait_sock *tw; struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; int h; local_bh_disable(); for (h = 0; h < (hashinfo->ehash_size); h++) { struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, h); - rwlock_t *lock = inet_ehash_lockp(hashinfo, h); + spinlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: - write_lock(lock); - sk_for_each(sk, node, &head->twchain) { + spin_lock(lock); + sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); if (!net_eq(twsk_net(tw), net) || @@ -434,13 +438,13 @@ restart: continue; atomic_inc(&tw->tw_refcnt); - write_unlock(lock); + spin_unlock(lock); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); goto restart; } - write_unlock(lock); + spin_unlock(lock); } local_bh_enable(); } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a456ceeac3f..b1fbe18feb5 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -144,7 +144,7 @@ static void unlink_from_unused(struct inet_peer *p) * _stack is known to be NULL or not at compile time, * so compiler will optimize the if (_stack) tests. */ -#define lookup(_daddr,_stack) \ +#define lookup(_daddr, _stack) \ ({ \ struct inet_peer *u, **v; \ if (_stack != NULL) { \ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 450016b89a1..df3fe50bbf0 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e4f81f54bef..6659ac000ee 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -56,7 +56,7 @@ struct ipfrag_skb_cb int offset; }; -#define FRAG_CB(skb) ((struct ipfrag_skb_cb*)((skb)->cb)) +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { @@ -559,9 +559,8 @@ out_nomem: goto out_fail; out_oversize: if (net_ratelimit()) - printk(KERN_INFO - "Oversized IP packet from " NIPQUAD_FMT ".\n", - NIPQUAD(qp->saddr)); + printk(KERN_INFO "Oversized IP packet from %pI4.\n", + &qp->saddr); out_fail: IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); return err; @@ -608,7 +607,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, @@ -616,7 +615,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_IPFRAG_TIME, @@ -624,8 +623,8 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; @@ -637,15 +636,15 @@ static struct ctl_table ip4_frags_ctl_table[] = { .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero }, { } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 85c487b8572..0101521f366 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -126,8 +126,6 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ -static int ipgre_fb_tunnel_init(struct net_device *dev); - #define HASH_SIZE 16 static int ipgre_net_id; @@ -371,7 +369,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) by themself??? */ - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; @@ -632,7 +630,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - tiph = (struct iphdr*)skb->data; + tiph = (struct iphdr *)skb->data; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; @@ -660,7 +658,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (neigh == NULL) goto tx_error; - addr6 = (struct in6_addr*)&neigh->primary_key; + addr6 = (struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) { @@ -726,7 +724,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info*)skb->dst; + struct rt6_info *rt6 = (struct rt6_info *)skb->dst; if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { if ((tunnel->parms.iph.daddr && @@ -800,7 +798,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = old_iph->ttl; #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; + iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); @@ -962,7 +960,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; } } else { - unsigned nflags=0; + unsigned nflags = 0; t = netdev_priv(dev); @@ -1104,7 +1102,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) { - struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); + struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); memcpy(haddr, &iph->saddr, 4); return 4; } @@ -1142,6 +1140,7 @@ static int ipgre_open(struct net_device *dev) static int ipgre_close(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); + if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; in_dev = inetdev_by_index(dev_net(dev), t->mlink); @@ -1155,14 +1154,22 @@ static int ipgre_close(struct net_device *dev) #endif +static const struct net_device_ops ipgre_netdev_ops = { + .ndo_init = ipgre_tunnel_init, + .ndo_uninit = ipgre_tunnel_uninit, +#ifdef CONFIG_NET_IPGRE_BROADCAST + .ndo_open = ipgre_open, + .ndo_stop = ipgre_close, +#endif + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tunnel_setup(struct net_device *dev) { - dev->init = ipgre_tunnel_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->do_ioctl = ipgre_tunnel_ioctl; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; @@ -1194,8 +1201,6 @@ static int ipgre_tunnel_init(struct net_device *dev) return -EINVAL; dev->flags = IFF_BROADCAST; dev->header_ops = &ipgre_header_ops; - dev->open = ipgre_open; - dev->stop = ipgre_close; } #endif } else @@ -1204,7 +1209,7 @@ static int ipgre_tunnel_init(struct net_device *dev) return 0; } -static int ipgre_fb_tunnel_init(struct net_device *dev) +static void ipgre_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -1220,7 +1225,6 @@ static int ipgre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ign->tunnels_wc[0] = tunnel; - return 0; } @@ -1264,9 +1268,9 @@ static int ipgre_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; dev_net_set(ign->fb_tunnel_dev, net); + + ipgre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) @@ -1397,16 +1401,22 @@ static int ipgre_tap_init(struct net_device *dev) return 0; } +static const struct net_device_ops ipgre_tap_netdev_ops = { + .ndo_init = ipgre_tap_init, + .ndo_uninit = ipgre_tunnel_uninit, + .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ipgre_tunnel_change_mtu, +}; + static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); - dev->init = ipgre_tap_init; - dev->uninit = ipgre_tunnel_uninit; + dev->netdev_ops = &ipgre_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->change_mtu = ipgre_tunnel_change_mtu; dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index cfb38ac9d69..1a58a6fa1dc 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -302,10 +302,8 @@ static inline int ip_rcv_options(struct sk_buff *skb) if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "source route option " - NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + printk(KERN_INFO "source route option %pI4 -> %pI4\n", + &iph->saddr, &iph->daddr); in_dev_put(in_dev); goto drop; } @@ -350,9 +348,9 @@ static int ip_rcv_finish(struct sk_buff *skb) struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); u32 idx = skb->dst->tclassid; st[idx&0xFF].o_packets++; - st[idx&0xFF].o_bytes+=skb->len; + st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; - st[(idx>>16)&0xFF].i_bytes+=skb->len; + st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d2a8f8bb78a..8ebe86dd72a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,7 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) +int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct iphdr *iph; int raw = 0; @@ -720,7 +720,7 @@ static inline int ip_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags) + int transhdrlen, int mtu, unsigned int flags) { struct sk_buff *skb; int err; @@ -741,7 +741,7 @@ static inline int ip_ufo_append_data(struct sock *sk, skb_reserve(skb, hh_len); /* create space for UDP/IP header */ - skb_put(skb,fragheaderlen + transhdrlen); + skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ skb_reset_network_header(skb); @@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable *rt, + struct ipcm_cookie *ipc, struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; + struct rtable *rt; if (flags&MSG_PROBE) return 0; @@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk, inet->cork.flags |= IPCORK_OPT; inet->cork.addr = ipc->addr; } - dst_hold(&rt->u.dst); + rt = *rtp; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); @@ -1279,7 +1284,12 @@ int ip_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb->dst = dst_clone(&rt->u.dst); + /* + * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec + * on dst refcount + */ + inet->cork.dst = NULL; + skb->dst = &rt->u.dst; if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) @@ -1391,7 +1401,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, rt, MSG_DONTWAIT); + &ipc, &rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(skb) + diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 465abf0a986..43c05854d75 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -48,6 +48,7 @@ #define IP_CMSG_RECVOPTS 8 #define IP_CMSG_RETOPTS 16 #define IP_CMSG_PASSSEC 32 +#define IP_CMSG_ORIGDSTADDR 64 /* * SOL_IP control messages. @@ -94,7 +95,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; - struct ip_options * opt = (struct ip_options*)optbuf; + struct ip_options * opt = (struct ip_options *)optbuf; if (IPCB(skb)->opt.optlen == 0) return; @@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) security_release_secctx(secdata, seclen); } +static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) +{ + struct sockaddr_in sin; + struct iphdr *iph = ip_hdr(skb); + __be16 *ports = (__be16 *)skb_transport_header(skb); + + if (skb_transport_offset(skb) + 4 > skb->len) + return; + + /* All current transport protocols have the port numbers in the + * first four bytes of the transport header and this function is + * written with this assumption in mind. + */ + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = iph->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); +} void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { @@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) if (flags & 1) ip_cmsg_recv_security(msg, skb); + + if ((flags>>=1) == 0) + return; + if (flags & 1) + ip_cmsg_recv_dstaddr(msg, skb); + } int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) @@ -411,7 +439,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct inet_sock *inet = inet_sk(sk); - int val=0,err; + int val = 0, err; if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | @@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || - optname == IP_MULTICAST_LOOP) { + optname == IP_MULTICAST_LOOP || + optname == IP_RECVORIGDSTADDR) { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -437,7 +466,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ if (ip_mroute_opt(optname)) - return ip_mroute_setsockopt(sk,optname,optval,optlen); + return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; lock_sock(sk); @@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_PASSSEC; break; + case IP_RECVORIGDSTADDR: + if (val) + inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; + else + inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~3; @@ -549,7 +584,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; if (optlen<1) goto e_inval; - if (val==-1) + if (val == -1) val = 1; if (val < 0 || val > 255) goto e_inval; @@ -573,12 +608,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct in_addr) && - copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) + copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr))) break; } @@ -626,11 +661,11 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (copy_from_user(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); - if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) + if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) break; } @@ -808,7 +843,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -ENOBUFS; break; } - gsf = kmalloc(optlen,GFP_KERNEL); + gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { err = -ENOBUFS; break; @@ -828,7 +863,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto mc_msf_out; } msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); - msf = kmalloc(msize,GFP_KERNEL); + msf = kmalloc(msize, GFP_KERNEL); if (!msf) { err = -ENOBUFS; goto mc_msf_out; @@ -971,9 +1006,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; if (ip_mroute_opt(optname)) - return ip_mroute_getsockopt(sk,optname,optval,optlen); + return ip_mroute_getsockopt(sk, optname, optval, optlen); - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; @@ -984,7 +1019,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; - struct ip_options * opt = (struct ip_options*)optbuf; + struct ip_options * opt = (struct ip_options *)optbuf; opt->optlen = 0; if (inet->opt) memcpy(optbuf, inet->opt, @@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_PASSSEC: val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; break; + case IP_RECVORIGDSTADDR: + val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; + break; case IP_TOS: val = inet->tos; break; @@ -1154,13 +1192,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, len = 1; if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&ucval,1)) + if (copy_to_user(optval, &ucval, 1)) return -EFAULT; } else { len = min_t(unsigned int, sizeof(int), len); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; } return 0; @@ -1178,7 +1216,7 @@ int ip_getsockopt(struct sock *sk, int level, !ip_mroute_opt(optname)) { int len; - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38ccb6dfb02..3262ce06294 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -35,12 +35,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n", - spi, NIPQUAD(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", + spi, &iph->daddr); xfrm_state_put(x); } @@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (t == NULL) goto out; @@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t; - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42065fff46c..42a0f3dd3fd 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -374,7 +374,7 @@ static int __init ic_defaults(void) */ if (!ic_host_name_set) - sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr)); + sprintf(init_utsname()->nodename, "%pI4", &ic_myaddr); if (root_server_addr == NONE) root_server_addr = ic_servaddr; @@ -387,11 +387,11 @@ static int __init ic_defaults(void) else if (IN_CLASSC(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSC_NET); else { - printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n", - NIPQUAD(ic_myaddr)); + printk(KERN_ERR "IP-Config: Unable to guess netmask for address %pI4\n", + &ic_myaddr); return -1; } - printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask)); + printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); } return 0; @@ -979,10 +979,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str ic_myaddr = b->your_ip; ic_servaddr = server_id; #ifdef IPCONFIG_DEBUG - printk("DHCP: Offered address " NIPQUAD_FMT, - NIPQUAD(ic_myaddr)); - printk(" by server " NIPQUAD_FMT "\n", - NIPQUAD(ic_servaddr)); + printk("DHCP: Offered address %pI4 by server %pI4\n", + &ic_myaddr, &ic_servaddr); #endif /* The DHCP indicated server address takes * precedence over the bootp header one if @@ -1177,11 +1175,11 @@ static int __init ic_dynamic(void) return -1; } - printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ", + printk("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - NIPQUAD(ic_servaddr)); - printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr)); + &ic_servaddr); + printk("my address is %pI4\n", &ic_myaddr); return 0; } @@ -1206,14 +1204,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v) "domain %s\n", ic_domain); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) - seq_printf(seq, - "nameserver " NIPQUAD_FMT "\n", - NIPQUAD(ic_nameservers[i])); + seq_printf(seq, "nameserver %pI4\n", + &ic_nameservers[i]); } if (ic_servaddr != NONE) - seq_printf(seq, - "bootserver " NIPQUAD_FMT "\n", - NIPQUAD(ic_servaddr)); + seq_printf(seq, "bootserver %pI4\n", + &ic_servaddr); return 0; } @@ -1387,13 +1383,13 @@ static int __init ip_auto_config(void) */ printk("IP-Config: Complete:"); printk("\n device=%s", ic_dev->name); - printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr)); - printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask)); - printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway)); + printk(", addr=%pI4", &ic_myaddr); + printk(", mask=%pI4", &ic_netmask); + printk(", gw=%pI4", &ic_gateway); printk(",\n host=%s, domain=%s, nis-domain=%s", utsname()->nodename, ic_domain, utsname()->domainname); - printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr)); - printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr)); + printk(",\n bootserver=%pI4", &ic_servaddr); + printk(", rootserver=%pI4", &root_server_addr); printk(", rootpath=%s", root_server_path); printk("\n"); #endif /* !SILENT */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 29609d29df7..5079dfbc6f3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -130,8 +130,8 @@ struct ipip_net { struct net_device *fb_tunnel_dev; }; -static int ipip_fb_tunnel_init(struct net_device *dev); -static int ipip_tunnel_init(struct net_device *dev); +static void ipip_fb_tunnel_init(struct net_device *dev); +static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static DEFINE_RWLOCK(ipip_lock); @@ -245,9 +245,10 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip_tunnel_init; nt->parms = *parms; + ipip_tunnel_init(dev); + if (register_netdevice(dev) < 0) goto failed_free; @@ -281,7 +282,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; @@ -691,12 +692,17 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip_netdev_ops = { + .ndo_uninit = ipip_tunnel_uninit, + .ndo_start_xmit = ipip_tunnel_xmit, + .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_change_mtu = ipip_tunnel_change_mtu, + +}; + static void ipip_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip_tunnel_uninit; - dev->hard_start_xmit = ipip_tunnel_xmit; - dev->do_ioctl = ipip_tunnel_ioctl; - dev->change_mtu = ipip_tunnel_change_mtu; + dev->netdev_ops = &ipip_netdev_ops; dev->destructor = free_netdev; dev->type = ARPHRD_TUNNEL; @@ -708,11 +714,9 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip_tunnel_init(struct net_device *dev) +static void ipip_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -721,11 +725,9 @@ static int ipip_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip_tunnel_bind_dev(dev); - - return 0; } -static int ipip_fb_tunnel_init(struct net_device *dev) +static void ipip_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -740,7 +742,6 @@ static int ipip_fb_tunnel_init(struct net_device *dev) dev_hold(dev); ipn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel ipip_handler = { @@ -792,10 +793,10 @@ static int ipip_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; dev_net_set(ipn->fb_tunnel_dev, net); + ipip_fb_tunnel_init(ipn->fb_tunnel_dev); + if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 25924b1eb2e..14666449dc1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,8 +124,8 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; memset(&p, 0, sizeof(p)); @@ -137,9 +137,13 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); + set_fs(oldfs); + } } } @@ -151,9 +155,9 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; struct in_device *in_dev; @@ -166,9 +170,14 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) sprintf(p.name, "dvmrp%d", v->vifc_vifi); ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; dev = NULL; @@ -213,12 +222,16 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops, dev->destructor = free_netdev; } @@ -331,7 +344,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) atomic_dec(&cache_resolve_queue_len); - while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { + while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; @@ -477,13 +490,13 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* * Fill in the VIF structures */ - v->rate_limit=vifc->vifc_rate_limit; - v->local=vifc->vifc_lcl_addr.s_addr; - v->remote=vifc->vifc_rmt_addr.s_addr; - v->flags=vifc->vifc_flags; + v->rate_limit = vifc->vifc_rate_limit; + v->local = vifc->vifc_lcl_addr.s_addr; + v->remote = vifc->vifc_rmt_addr.s_addr; + v->flags = vifc->vifc_flags; if (!mrtsock) v->flags |= VIFF_STATIC; - v->threshold=vifc->vifc_threshold; + v->threshold = vifc->vifc_threshold; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; @@ -494,7 +507,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev=dev; + v->dev = dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) reg_vif_num = vifi; @@ -507,7 +520,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) { - int line=MFC_HASH(mcastgrp,origin); + int line = MFC_HASH(mcastgrp, origin); struct mfc_cache *c; for (c=mfc_cache_array[line]; c; c = c->next) { @@ -522,8 +535,8 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) */ static struct mfc_cache *ipmr_cache_alloc(void) { - struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c==NULL) + struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); + if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXVIFS; return c; @@ -531,8 +544,8 @@ static struct mfc_cache *ipmr_cache_alloc(void) static struct mfc_cache *ipmr_cache_alloc_unres(void) { - struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); - if (c==NULL) + struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); + if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10*HZ; @@ -552,7 +565,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) * Play the pending entries through our router */ - while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -637,7 +650,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) * Add our header */ - igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); + igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); igmp->type = msg->im_msgtype = assert; igmp->code = 0; @@ -653,7 +666,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) /* * Deliver to mrouted */ - if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { + if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { if (net_ratelimit()) printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -685,7 +698,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len)>=10 || + if (atomic_read(&cache_resolve_queue_len) >= 10 || (c=ipmr_cache_alloc_unres())==NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -728,7 +741,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) kfree_skb(skb); err = -ENOBUFS; } else { - skb_queue_tail(&c->mfc_un.unres.unresolved,skb); + skb_queue_tail(&c->mfc_un.unres.unresolved, skb); err = 0; } @@ -745,7 +758,7 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) int line; struct mfc_cache *c, **cp; - line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && @@ -766,7 +779,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) int line; struct mfc_cache *uc, *c, **cp; - line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && @@ -787,13 +800,13 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; - c=ipmr_cache_alloc(); - if (c==NULL) + c = ipmr_cache_alloc(); + if (c == NULL) return -ENOMEM; - c->mfc_origin=mfc->mfcc_origin.s_addr; - c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; - c->mfc_parent=mfc->mfcc_parent; + c->mfc_origin = mfc->mfcc_origin.s_addr; + c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; + c->mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; @@ -846,7 +859,7 @@ static void mroute_clean_tables(struct sock *sk) /* * Wipe the cache */ - for (i=0;i<MFC_LINES;i++) { + for (i=0; i<MFC_LINES; i++) { struct mfc_cache *c, **cp; cp = &mfc_cache_array[i]; @@ -887,7 +900,7 @@ static void mrtsock_destruct(struct sock *sk) IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; write_lock_bh(&mrt_lock); - mroute_socket=NULL; + mroute_socket = NULL; write_unlock_bh(&mrt_lock); mroute_clean_tables(sk); @@ -902,7 +915,7 @@ static void mrtsock_destruct(struct sock *sk) * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) { int ret; struct vifctl vif; @@ -918,7 +931,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_IGMP) return -EOPNOTSUPP; - if (optlen!=sizeof(int)) + if (optlen != sizeof(int)) return -ENOPROTOOPT; rtnl_lock(); @@ -930,7 +943,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { write_lock_bh(&mrt_lock); - mroute_socket=sk; + mroute_socket = sk; write_unlock_bh(&mrt_lock); IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; @@ -938,19 +951,19 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt rtnl_unlock(); return ret; case MRT_DONE: - if (sk!=mroute_socket) + if (sk != mroute_socket) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen!=sizeof(vif)) + if (optlen != sizeof(vif)) return -EINVAL; - if (copy_from_user(&vif,optval,sizeof(vif))) + if (copy_from_user(&vif, optval, sizeof(vif))) return -EFAULT; if (vif.vifc_vifi >= MAXVIFS) return -ENFILE; rtnl_lock(); - if (optname==MRT_ADD_VIF) { + if (optname == MRT_ADD_VIF) { ret = vif_add(&vif, sk==mroute_socket); } else { ret = vif_delete(vif.vifc_vifi, 0); @@ -964,12 +977,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt */ case MRT_ADD_MFC: case MRT_DEL_MFC: - if (optlen!=sizeof(mfc)) + if (optlen != sizeof(mfc)) return -EINVAL; - if (copy_from_user(&mfc,optval, sizeof(mfc))) + if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; rtnl_lock(); - if (optname==MRT_DEL_MFC) + if (optname == MRT_DEL_MFC) ret = ipmr_mfc_delete(&mfc); else ret = ipmr_mfc_add(&mfc, sk==mroute_socket); @@ -1028,12 +1041,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt * Getsock opt support for the multicast routing system. */ -int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen) +int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; int val; - if (optname!=MRT_VERSION && + if (optname != MRT_VERSION && #ifdef CONFIG_IP_PIMSM optname!=MRT_PIM && #endif @@ -1047,17 +1060,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u if (olr < 0) return -EINVAL; - if (put_user(olr,optlen)) + if (put_user(olr, optlen)) return -EFAULT; - if (optname==MRT_VERSION) - val=0x0305; + if (optname == MRT_VERSION) + val = 0x0305; #ifdef CONFIG_IP_PIMSM - else if (optname==MRT_PIM) - val=mroute_do_pim; + else if (optname == MRT_PIM) + val = mroute_do_pim; #endif else - val=mroute_do_assert; - if (copy_to_user(optval,&val,olr)) + val = mroute_do_assert; + if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; } @@ -1075,27 +1088,27 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) switch (cmd) { case SIOCGETVIFCNT: - if (copy_from_user(&vr,arg,sizeof(vr))) + if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.vifi>=maxvif) + if (vr.vifi >= maxvif) return -EINVAL; read_lock(&mrt_lock); vif=&vif_table[vr.vifi]; if (VIF_EXISTS(vr.vifi)) { - vr.icount=vif->pkt_in; - vr.ocount=vif->pkt_out; - vr.ibytes=vif->bytes_in; - vr.obytes=vif->bytes_out; + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; read_unlock(&mrt_lock); - if (copy_to_user(arg,&vr,sizeof(vr))) + if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } read_unlock(&mrt_lock); return -EADDRNOTAVAIL; case SIOCGETSGCNT: - if (copy_from_user(&sr,arg,sizeof(sr))) + if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; read_lock(&mrt_lock); @@ -1106,7 +1119,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) sr.wrong_if = c->mfc_un.res.wrong_if; read_unlock(&mrt_lock); - if (copy_to_user(arg,&sr,sizeof(sr))) + if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } @@ -1130,15 +1143,15 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; - for (ct=0;ct<maxvif;ct++,v++) { - if (v->dev==dev) + for (ct=0; ct<maxvif; ct++,v++) { + if (v->dev == dev) vif_delete(ct, 1); } return NOTIFY_DONE; } -static struct notifier_block ip_mr_notifier={ +static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; @@ -1204,7 +1217,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) #ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; - vif->bytes_out+=skb->len; + vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); @@ -1254,7 +1267,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) } vif->pkt_out++; - vif->bytes_out+=skb->len; + vif->bytes_out += skb->len; dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -1352,7 +1365,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local } vif_table[vif].pkt_in++; - vif_table[vif].bytes_in+=skb->len; + vif_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1364,7 +1377,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local if (skb2) ipmr_queue_xmit(skb2, cache, psend); } - psend=ct; + psend = ct; } } if (psend != -1) { @@ -1428,7 +1441,7 @@ int ip_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache==NULL) { + if (cache == NULL) { int vif; if (local) { @@ -1469,29 +1482,13 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - -int pim_rcv_v1(struct sk_buff * skb) +#ifdef CONFIG_IP_PIMSM +static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) { - struct igmphdr *pim; - struct iphdr *encap; - struct net_device *reg_dev = NULL; - - if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) - goto drop; + struct net_device *reg_dev = NULL; + struct iphdr *encap; - pim = igmp_hdr(skb); - - if (!mroute_do_pim || - skb->len < sizeof(*pim) + sizeof(*encap) || - pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) - goto drop; - - encap = (struct iphdr *)(skb_transport_header(skb) + - sizeof(struct igmphdr)); + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: a. packet is really destinted to a multicast group @@ -1500,8 +1497,8 @@ int pim_rcv_v1(struct sk_buff * skb) */ if (!ipv4_is_multicast(encap->daddr) || encap->tot_len == 0 || - ntohs(encap->tot_len) + sizeof(*pim) > skb->len) - goto drop; + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; read_lock(&mrt_lock); if (reg_vif_num >= 0) @@ -1511,7 +1508,7 @@ int pim_rcv_v1(struct sk_buff * skb) read_unlock(&mrt_lock); if (reg_dev == NULL) - goto drop; + return 1; skb->mac_header = skb->network_header; skb_pull(skb, (u8*)encap - skb->data); @@ -1527,9 +1524,33 @@ int pim_rcv_v1(struct sk_buff * skb) nf_reset(skb); netif_rx(skb); dev_put(reg_dev); + return 0; - drop: - kfree_skb(skb); +} +#endif + +#ifdef CONFIG_IP_PIMSM_V1 +/* + * Handle IGMP messages of PIMv1 + */ + +int pim_rcv_v1(struct sk_buff * skb) +{ + struct igmphdr *pim; + + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) + goto drop; + + pim = igmp_hdr(skb); + + if (!mroute_do_pim || + pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) + goto drop; + + if (__pim_rcv(skb, sizeof(*pim))) { +drop: + kfree_skb(skb); + } return 0; } #endif @@ -1538,10 +1559,8 @@ int pim_rcv_v1(struct sk_buff * skb) static int pim_rcv(struct sk_buff * skb) { struct pimreghdr *pim; - struct iphdr *encap; - struct net_device *reg_dev = NULL; - if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); @@ -1551,41 +1570,10 @@ static int pim_rcv(struct sk_buff * skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - /* check if the inner packet is destined to mcast group */ - encap = (struct iphdr *)(skb_transport_header(skb) + - sizeof(struct pimreghdr)); - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + sizeof(*pim) > skb->len) - goto drop; - - read_lock(&mrt_lock); - if (reg_vif_num >= 0) - reg_dev = vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); - read_unlock(&mrt_lock); - - if (reg_dev == NULL) - goto drop; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8*)encap - skb->data); - skb_reset_network_header(skb); - skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = 0; - skb->pkt_type = PACKET_HOST; - dst_release(skb->dst); - reg_dev->stats.rx_bytes += skb->len; - reg_dev->stats.rx_packets++; - skb->dst = NULL; - nf_reset(skb); - netif_rx(skb); - dev_put(reg_dev); - return 0; - drop: - kfree_skb(skb); + if (__pim_rcv(skb, sizeof(*pim))) { +drop: + kfree_skb(skb); + } return 0; } #endif @@ -1602,13 +1590,13 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) if (dev) RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); - mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0)); + mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (c->mfc_un.res.ttls[ct] < 255) { if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) goto rtattr_failure; - nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; @@ -1634,7 +1622,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) read_lock(&mrt_lock); cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); - if (cache==NULL) { + if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; struct net_device *dev; @@ -1866,15 +1854,16 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld", + seq_printf(seq, "%08lX %08lX %-3hd", (unsigned long) mfc->mfc_mcastgrp, (unsigned long) mfc->mfc_origin, - mfc->mfc_parent, - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); + mfc->mfc_parent); if (it->cache != &mfc_unres_queue) { + seq_printf(seq, " %8lu %8lu %8lu", + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++ ) { if (VIF_EXISTS(n) @@ -1883,6 +1872,11 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } + } else { + /* unresolved mfc_caches don't contain + * pkt, bytes and wrong_if values + */ + seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb..fdf6811c31a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; dst_release(skb->dst); @@ -125,6 +125,7 @@ struct ip_rt_info { __be32 daddr; __be32 saddr; u_int8_t tos; + u_int32_t mark; }; static void nf_ip_saveroute(const struct sk_buff *skb, @@ -138,6 +139,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, rt_info->tos = iph->tos; rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; } } @@ -150,6 +152,7 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos + && skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) return ip_route_me_harder(skb, RTN_UNSPEC); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d70d29f1cc..7ea88b61cb0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -142,15 +142,15 @@ static inline int arp_packet_match(const struct arphdr *arphdr, ARPT_INV_TGTIP)) { dprintf("Source or target IP address mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(src_ipaddr), - NIPQUAD(arpinfo->smsk.s_addr), - NIPQUAD(arpinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &src_ipaddr, + &arpinfo->smsk.s_addr, + &arpinfo->src.s_addr, arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); - dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(tgt_ipaddr), - NIPQUAD(arpinfo->tmsk.s_addr), - NIPQUAD(arpinfo->tgt.s_addr), + dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &tgt_ipaddr, + &arpinfo->tmsk.s_addr, + &arpinfo->tgt.s_addr, arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); return 0; } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index bee3d117661..e091187e864 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -75,16 +75,6 @@ static unsigned int arpt_out_hook(unsigned int hook, dev_net(out)->ipv4.arptable_filter); } -static unsigned int arpt_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return arpt_do_table(skb, hook, in, out, - dev_net(in)->ipv4.arptable_filter); -} - static struct nf_hook_ops arpt_ops[] __read_mostly = { { .hook = arpt_in_hook, @@ -101,7 +91,7 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_forward_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 213fb27debc..ef8b6ca068b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -94,15 +94,11 @@ ip_packet_match(const struct iphdr *ip, IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->saddr), - NIPQUAD(ipinfo->smsk.s_addr), - NIPQUAD(ipinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->daddr), - NIPQUAD(ipinfo->dmsk.s_addr), - NIPQUAD(ipinfo->dst.s_addr), + dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); return false; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7ac1677419a..2e4f98b8552 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -168,7 +168,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, char buffer[16]; /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + sprintf(buffer, "%pI4", &ip); c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, clusterip_procdir, &clusterip_proc_fops, c); @@ -373,7 +373,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) config = clusterip_config_find_get(e->ip.dst.s_addr, 1); if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { - printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); + printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); return false; } else { struct net_device *dev; @@ -478,9 +478,8 @@ static void arp_print(struct arp_payload *payload) } hbuffer[--k]='\0'; - printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", - NIPQUAD(payload->src_ip), hbuffer, - NIPQUAD(payload->dst_ip)); + printk("src %pI4@%s, dst %pI4\n", + &payload->src_ip, hbuffer, &payload->dst_ip); } #endif diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index fc6ce04a3e3..27a78fbbd92 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -54,8 +54,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Important fields: * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", - NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); + printk("SRC=%pI4 DST=%pI4 ", + &ih->saddr, &ih->daddr); /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", @@ -262,8 +262,7 @@ static void dump_packet(const struct nf_loginfo *info, break; case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ - printk("GATEWAY=%u.%u.%u.%u ", - NIPQUAD(ich->un.gateway)); + printk("GATEWAY=%pI4 ", &ich->un.gateway); /* Fall through */ case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: @@ -340,8 +339,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 88762f02779..3b216be3bc9 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -23,24 +23,25 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: address type match for IPv4"); -static inline bool match_type(const struct net_device *dev, __be32 addr, - u_int16_t mask) +static inline bool match_type(struct net *net, const struct net_device *dev, + __be32 addr, u_int16_t mask) { - return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr))); + return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); } static bool addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; if (info->source) - ret &= match_type(NULL, iph->saddr, info->source) ^ + ret &= match_type(net, NULL, iph->saddr, info->source) ^ info->invert_source; if (info->dest) - ret &= match_type(NULL, iph->daddr, info->dest) ^ + ret &= match_type(net, NULL, iph->daddr, info->dest) ^ info->invert_dest; return ret; @@ -49,6 +50,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; @@ -60,10 +62,10 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) dev = par->out; if (info->source) - ret &= match_type(dev, iph->saddr, info->source) ^ + ret &= match_type(net, dev, iph->saddr, info->source) ^ (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) - ret &= match_type(dev, iph->daddr, info->dest) ^ + ret &= match_type(net, dev, iph->daddr, info->dest) ^ !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4a7c3527539..b2141e11575 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -60,9 +60,8 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv4_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", - NIPQUAD(tuple->src.u3.ip), - NIPQUAD(tuple->dst.u3.ip)); + return seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -198,7 +197,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, @@ -206,7 +205,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, @@ -214,7 +213,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, @@ -222,7 +221,7 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, @@ -230,8 +229,8 @@ static ctl_table ip_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, @@ -284,17 +283,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) .tuple.dst.u3.ip; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); - pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); nf_ct_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } - pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4e887922022..1fd3ef7718b 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -272,7 +272,7 @@ static struct ctl_table icmp_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -285,7 +285,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index ee47bf28c82..7e8e6fc7541 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -119,10 +119,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, (ntohl(addr.ip) & 0xff000000) == 0x7f000000) i = 0; - pr_debug("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.dst.u3.ip, info->sig_port[!dir]); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. @@ -131,10 +130,9 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && port == info->sig_port[dir]) { /* GK->GW */ - pr_debug("nf_nat_ras: set signal address " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), port, - NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.src.u3.ip, info->sig_port[!dir]); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. @@ -162,10 +160,9 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && port == ct->tuplehash[dir].tuple.src.u.udp.port) { - pr_debug("nf_nat_ras: set rasAddress " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(addr.ip), ntohs(port), - NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), + pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", + &addr.ip, ntohs(port), + &ct->tuplehash[!dir].tuple.dst.u3.ip, ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, @@ -257,15 +254,15 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtp_exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", + &rtp_exp->tuple.src.u3.ip, ntohs(rtp_exp->tuple.src.u.udp.port), - NIPQUAD(rtp_exp->tuple.dst.u3.ip), + &rtp_exp->tuple.dst.u3.ip, ntohs(rtp_exp->tuple.dst.u.udp.port)); - pr_debug("nf_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(rtcp_exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", + &rtcp_exp->tuple.src.u3.ip, ntohs(rtcp_exp->tuple.src.u.udp.port), - NIPQUAD(rtcp_exp->tuple.dst.u3.ip), + &rtcp_exp->tuple.dst.u3.ip, ntohs(rtcp_exp->tuple.dst.u.udp.port)); return 0; @@ -307,10 +304,10 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, return -1; } - pr_debug("nf_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -361,10 +358,10 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, return -1; } - pr_debug("nf_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -455,10 +452,10 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; @@ -524,11 +521,10 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Success */ - pr_debug("nf_nat_q931: expect Call Forwarding " - "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", - NIPQUAD(exp->tuple.src.u3.ip), + pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.u3.ip), + &exp->tuple.dst.u3.ip, ntohs(exp->tuple.dst.u.tcp.port)); return 0; diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index fe6f9cef6c8..ea83a886b03 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -55,8 +55,8 @@ static unsigned int help(struct sk_buff *skb, ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip); sprintf(buffer, "%u %u", ip, port); - pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", + buffer, &ip, port); ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index bea54a68510..a7eb0471904 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,7 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), + .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; @@ -86,25 +86,6 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } -/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) -{ - static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; - struct rtable *rt; - - if (ip_route_output_key(net, &rt, &fl) != 0) - return; - - if (rt->rt_src != srcip && !warned) { - printk("NAT: no longer support implicit source local NAT\n"); - printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", - NIPQUAD(srcip), NIPQUAD(dstip)); - warned = 1; - } - ip_rt_put(rt); -} - static unsigned int ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { @@ -120,11 +101,6 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (par->hooknum == NF_INET_LOCAL_OUT && - mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, - mr->range[0].min_ip); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } @@ -166,8 +142,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) struct nf_nat_range range = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; - pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", - ct, NIPQUAD(ip)); + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 14544320c54..07d61a57613 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -74,8 +74,7 @@ static int map_addr(struct sk_buff *skb, if (newaddr == addr->ip && newport == port) return 1; - buflen = sprintf(buffer, "%u.%u.%u.%u:%u", - NIPQUAD(newaddr), ntohs(newport)); + buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); return mangle_packet(skb, dptr, datalen, matchoff, matchlen, buffer, buflen); @@ -152,8 +151,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, &addr) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { - __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip; - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + buflen = sprintf(buffer, "%pI4", + &ct->tuplehash[!dir].tuple.dst.u3.ip); if (!mangle_packet(skb, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; @@ -166,8 +165,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, &addr) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip; - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); + buflen = sprintf(buffer, "%pI4", + &ct->tuplehash[!dir].tuple.src.u3.ip); if (!mangle_packet(skb, dptr, datalen, poff, plen, buffer, buflen)) return NF_DROP; @@ -279,8 +278,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, if (exp->tuple.dst.u3.ip != exp->saved_ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { - buflen = sprintf(buffer, "%u.%u.%u.%u:%u", - NIPQUAD(newip), port); + buflen = sprintf(buffer, "%pI4:%u", &newip, port); if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, buffer, buflen)) goto err; @@ -345,7 +343,7 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int buflen; - buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); + buflen = sprintf(buffer, "%pI4", &addr->ip); if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, buffer, buflen)) return 0; @@ -380,7 +378,7 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, unsigned int buflen; /* Mangle session description owner and contact addresses */ - buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); + buflen = sprintf(buffer, "%pI4", &addr->ip); if (mangle_sdp_packet(skb, dptr, dataoff, datalen, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8303e4b406c..182f845de92 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -930,8 +930,8 @@ static inline void mangle_address(unsigned char *begin, } if (debug) - printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " - "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); + printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", + &old, addr); } } @@ -1267,9 +1267,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, */ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { if (net_ratelimit()) - printk(KERN_WARNING "SNMP: dropping malformed packet " - "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", + &iph->saddr, &iph->daddr); return NF_DROP; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a631a1f110c..614958b7c27 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,8 +54,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), - atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + (int)percpu_counter_sum_positive(&tcp_orphan_count), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), @@ -234,6 +235,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), + SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), + SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cd975743bcd..dff8bc4e0fa 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -247,7 +247,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) } if (inet->recverr) { - struct iphdr *iph = (struct iphdr*)skb->data; + struct iphdr *iph = (struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); if (inet->hdrincl) @@ -465,7 +465,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (msg->msg_namelen) { - struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name; + struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; @@ -572,7 +572,7 @@ back_from_confirm: ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - &ipc, rt, msg->msg_flags); + &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) @@ -851,7 +851,7 @@ struct proto raw_prot = { static struct sock *raw_get_first(struct seq_file *seq) { struct sock *sk; - struct raw_iter_state* state = raw_seq_private(seq); + struct raw_iter_state *state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { @@ -868,7 +868,7 @@ found: static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_iter_state* state = raw_seq_private(seq); + struct raw_iter_state *state = raw_seq_private(seq); do { sk = sk_next(sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2ea6dcc3e2c..77bfba97595 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; +static int rt_chain_length_max __read_mostly = 20; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); @@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); +static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -158,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -201,6 +202,7 @@ const __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ defined(CONFIG_PROVE_LOCKING) /* @@ -674,6 +676,20 @@ static inline u32 rt_score(struct rtable *rt) return score; } +static inline bool rt_caching(const struct net *net) +{ + return net->ipv4.current_rt_cache_rebuild_count <= + net->ipv4.sysctl_rt_cache_rebuild_count; +} + +static inline bool compare_hash_inputs(const struct flowi *fl1, + const struct flowi *fl2) +{ + return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | + (fl1->iif ^ fl2->iif)) == 0); +} + static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | @@ -753,11 +769,24 @@ static void rt_do_flush(int process_context) } } +/* + * While freeing expired entries, we compute average chain length + * and standard deviation, using fixed-point arithmetic. + * This to have an estimation of rt_chain_length_max + * rt_chain_length_max = max(elasticity, AVG + 4*SD) + * We use 3 bits for frational part, and 29 (or 61) for magnitude. + */ + +#define FRACT_BITS 3 +#define ONE (1UL << FRACT_BITS) + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; + unsigned long length = 0, samples = 0; + unsigned long sum = 0, sum2 = 0; u64 mult; mult = ((u64)ip_rt_gc_interval) << rt_hash_log; @@ -766,6 +795,7 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; + length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; @@ -775,6 +805,8 @@ static void rt_check_expire(void) if (need_resched()) cond_resched(); + samples++; + if (*rthp == NULL) continue; spin_lock_bh(rt_hash_lock_addr(i)); @@ -789,11 +821,29 @@ static void rt_check_expire(void) if (time_before_eq(jiffies, rth->u.dst.expires)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + /* + * Only bump our length if the hash + * inputs on entries n and n+1 are not + * the same, we only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } @@ -802,6 +852,15 @@ static void rt_check_expire(void) rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); } rover = i; } @@ -851,6 +910,26 @@ static void rt_secret_rebuild(unsigned long __net) mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } +static void rt_secret_rebuild_oneshot(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); + rt_cache_invalidate(net); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } +} + +static void rt_emergency_hash_rebuild(struct net *net) +{ + if (net_ratelimit()) { + printk(KERN_WARNING "Route hash chain too long!\n"); + printk(KERN_WARNING "Adjust your secret_interval!\n"); + } + + rt_secret_rebuild_oneshot(net); +} + /* Short description of GC goals. @@ -989,6 +1068,7 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; + struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1002,7 +1082,13 @@ restart: candp = NULL; now = jiffies; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + rt_drop(rt); + return 0; + } + rthp = &rt_hash_table[hash].chain; + rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1048,6 +1134,17 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; + + /* + * check to see if the next entry in the chain + * contains the same hash input values as rt. If it does + * This is where we will insert into the list, instead of + * at the head. This groups entries that differ by aspects not + * relvant to the hash function together, which we use to adjust + * our chain length + */ + if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) + rthi = rth; } if (cand) { @@ -1061,6 +1158,16 @@ restart: *candp = cand->u.dst.rt_next; rt_free(cand); } + } else { + if (chain_length > rt_chain_length_max) { + struct net *net = dev_net(rt->u.dst.dev); + int num = ++net->ipv4.current_rt_cache_rebuild_count; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", + rt->u.dst.dev->name, num); + } + rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); + } } /* Try to bind route to arp only if it is output @@ -1098,14 +1205,17 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + if (rthi) + rt->u.dst.rt_next = rthi->u.dst.rt_next; + else + rt->u.dst.rt_next = rt_hash_table[hash].chain; + #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, - NIPQUAD(rt->rt_dst)); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) - printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); + printk(" . %pI4", &trt->rt_dst); printk("\n"); } #endif @@ -1114,7 +1224,11 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + if (rthi) + rcu_assign_pointer(rthi->u.dst.rt_next, rt); + else + rcu_assign_pointer(rt_hash_table[hash].chain, rt); + spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; return 0; @@ -1217,6 +1331,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, || ipv4_is_zeronet(new_gw)) goto reject_redirect; + if (!rt_caching(net)) + goto reject_redirect; + if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -1267,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, /* Copy all the information. */ *rt = *rth; - INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.child = NULL; @@ -1280,7 +1396,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; +#ifdef CONFIG_XFRM rt->u.dst.xfrm = NULL; +#endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; @@ -1324,11 +1442,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, reject_redirect: #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " - NIPQUAD_FMT " ignored.\n" - " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", - NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), - NIPQUAD(saddr), NIPQUAD(daddr)); + printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" + " Advised path = %pI4 -> %pI4\n", + &old_gw, dev->name, &new_gw, + &saddr, &daddr); #endif in_dev_put(in_dev); } @@ -1348,9 +1465,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt->fl.oif, rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 - printk(KERN_DEBUG "ipv4_negative_advice: redirect to " - NIPQUAD_FMT "/%02x dropped\n", - NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); + printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", + &rt->rt_dst, rt->fl.fl4_tos); #endif rt_del(hash, rt); ret = NULL; @@ -1414,10 +1530,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (IN_DEV_LOG_MARTIANS(in_dev) && rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) - printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " - "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", - NIPQUAD(rt->rt_src), rt->rt_iif, - NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); + printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", + &rt->rt_src, rt->rt_iif, + &rt->rt_dst, &rt->rt_gateway); #endif } out: @@ -1610,8 +1725,8 @@ static void ipv4_link_failure(struct sk_buff *skb) static int ip_rt_bug(struct sk_buff *skb) { - printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", - NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), + printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, skb->dev ? skb->dev->name : "?"); kfree_skb(skb); return 0; @@ -1788,9 +1903,8 @@ static void ip_handle_martian_source(struct net_device *dev, * RFC1812 recommendation, if source is martian, * the only hint is MAC header. */ - printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " - NIPQUAD_FMT", on dev %s\n", - NIPQUAD(daddr), NIPQUAD(saddr), dev->name); + printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", + &daddr, &saddr, dev->name); if (dev->hard_header_len && skb_mac_header_was_set(skb)) { int i; const unsigned char *p = skb_mac_header(skb); @@ -2099,9 +2213,8 @@ martian_destination: RT_CACHE_STAT_INC(in_martian_dst); #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " - NIPQUAD_FMT ", dev %s\n", - NIPQUAD(daddr), NIPQUAD(saddr), dev->name); + printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", + &daddr, &saddr, dev->name); #endif e_hostunreach: @@ -2130,6 +2243,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct net *net; net = dev_net(dev); + + if (!rt_caching(net)) + goto skip_cache; + tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); @@ -2154,6 +2271,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, } rcu_read_unlock(); +skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2539,6 +2657,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; + if (!rt_caching(net)) + goto slow_output; + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); @@ -2563,6 +2684,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, } rcu_read_unlock_bh(); +slow_output: return ip_route_output_slow(net, rp, flp); } @@ -2578,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -2640,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); @@ -2995,7 +3116,7 @@ static ctl_table ipv4_route_table[] = { .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, @@ -3003,7 +3124,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { /* Deprecated. Use gc_min_interval_ms */ @@ -3013,8 +3134,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, @@ -3022,8 +3143,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, @@ -3031,8 +3152,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, @@ -3040,8 +3161,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, @@ -3049,7 +3170,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_load, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, @@ -3057,7 +3178,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_number, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, @@ -3065,7 +3186,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_COST, @@ -3073,7 +3194,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_cost, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, @@ -3081,7 +3202,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_error_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, @@ -3089,7 +3210,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, @@ -3097,8 +3218,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, @@ -3106,7 +3227,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, @@ -3114,7 +3235,7 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, @@ -3122,8 +3243,8 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_sysctl_rt_secret_interval, - .strategy = &ipv4_sysctl_rt_secret_interval_strategy, + .proc_handler = ipv4_sysctl_rt_secret_interval, + .strategy = ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; @@ -3151,8 +3272,8 @@ static struct ctl_table ipv4_route_flush_table[] = { .procname = "flush", .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, + .proc_handler = ipv4_sysctl_rtcache_flush, + .strategy = ipv4_sysctl_rtcache_flush_strategy, }, { .ctl_name = 0 }, }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1bb10df8ce7..4710d219f06 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -195,7 +195,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_timestamps, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, @@ -203,7 +203,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_window_scaling, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SACK, @@ -211,7 +211,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_sack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, @@ -219,7 +219,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DEFAULT_TTL, @@ -227,8 +227,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, + .proc_handler = ipv4_doint_and_flush, + .strategy = ipv4_doint_and_flush_strategy, .extra2 = &init_net, }, { @@ -237,7 +237,7 @@ static struct ctl_table ipv4_table[] = { .data = &ipv4_config.no_pmtu_disc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_NONLOCAL_BIND, @@ -245,7 +245,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_SYN_RETRIES, @@ -253,7 +253,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_SYNACK_RETRIES, @@ -261,7 +261,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_synack_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_ORPHANS, @@ -269,7 +269,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_TW_BUCKETS, @@ -277,7 +277,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_DYNADDR, @@ -285,7 +285,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, @@ -293,8 +293,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, @@ -302,7 +302,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, @@ -310,8 +310,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_TCP_RETRIES1, @@ -319,8 +319,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra2 = &tcp_retr1_max }, { @@ -329,7 +329,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_retries2, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, @@ -337,8 +337,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, #ifdef CONFIG_SYN_COOKIES { @@ -347,7 +347,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syncookies, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -356,7 +356,7 @@ static struct ctl_table ipv4_table[] = { .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, @@ -364,7 +364,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_STDURG, @@ -372,7 +372,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_stdurg, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RFC1337, @@ -380,7 +380,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rfc1337, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MAX_SYN_BACKLOG, @@ -388,7 +388,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_max_syn_backlog, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, @@ -396,8 +396,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_local_ports.range, .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, - .proc_handler = &ipv4_local_port_range, - .strategy = &ipv4_sysctl_local_port_range, + .proc_handler = ipv4_local_port_range, + .strategy = ipv4_sysctl_local_port_range, }, #ifdef CONFIG_IP_MULTICAST { @@ -406,7 +406,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif @@ -416,7 +416,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_igmp_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, @@ -424,7 +424,7 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_threshold, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_INET_PEER_MINTTL, @@ -432,8 +432,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_MAXTTL, @@ -441,8 +441,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, @@ -450,8 +450,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_mintime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, @@ -459,8 +459,8 @@ static struct ctl_table ipv4_table[] = { .data = &inet_peer_gc_maxtime, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { .ctl_name = NET_TCP_ORPHAN_RETRIES, @@ -468,7 +468,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FACK, @@ -476,7 +476,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_fack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_REORDERING, @@ -484,7 +484,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ECN, @@ -492,7 +492,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_ecn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_DSACK, @@ -500,7 +500,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_MEM, @@ -508,7 +508,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_WMEM, @@ -516,7 +516,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_RMEM, @@ -524,7 +524,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_APP_WIN, @@ -532,7 +532,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_app_win, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_ADV_WIN_SCALE, @@ -540,7 +540,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_TW_REUSE, @@ -548,7 +548,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tw_reuse, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO, @@ -556,7 +556,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_FRTO_RESPONSE, @@ -564,7 +564,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_frto_response, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_LOW_LATENCY, @@ -572,7 +572,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_TCP_NO_METRICS_SAVE, @@ -580,7 +580,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_nometrics_save, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MODERATE_RCVBUF, @@ -588,7 +588,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_TSO_WIN_DIVISOR, @@ -596,15 +596,15 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_tso_win_divisor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_CONG_CONTROL, .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, - .proc_handler = &proc_tcp_congestion_control, - .strategy = &sysctl_tcp_congestion_control, + .proc_handler = proc_tcp_congestion_control, + .strategy = sysctl_tcp_congestion_control, }, { .ctl_name = NET_TCP_ABC, @@ -612,7 +612,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_abc, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_MTU_PROBING, @@ -620,7 +620,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_TCP_BASE_MSS, @@ -628,7 +628,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_base_mss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, @@ -636,7 +636,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NET_DMA { @@ -645,7 +645,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_dma_copybreak, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -654,7 +654,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #ifdef CONFIG_NETLABEL { @@ -663,7 +663,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, @@ -671,7 +671,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_OPTFMT, @@ -679,7 +679,7 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, @@ -687,22 +687,22 @@ static struct ctl_table ipv4_table[] = { .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ { .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, - .proc_handler = &proc_tcp_available_congestion_control, + .proc_handler = proc_tcp_available_congestion_control, }, { .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, - .proc_handler = &proc_allowed_congestion_control, - .strategy = &strategy_allowed_congestion_control, + .proc_handler = proc_allowed_congestion_control, + .strategy = strategy_allowed_congestion_control, }, { .ctl_name = NET_TCP_MAX_SSTHRESH, @@ -710,7 +710,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_max_ssthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -718,8 +718,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -728,8 +728,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_rmem_min, .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { @@ -738,8 +738,8 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_udp_wmem_min, .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero }, { .ctl_name = 0 } @@ -752,7 +752,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, @@ -760,7 +760,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, @@ -768,7 +768,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, @@ -776,7 +776,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV4_ICMP_RATELIMIT, @@ -784,8 +784,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, @@ -793,7 +793,15 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rt_cache_rebuild_count", + .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec }, { } }; @@ -827,8 +835,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_icmp_ratelimit; table[5].data = &net->ipv4.sysctl_icmp_ratemask; + table[6].data = + &net->ipv4.sysctl_rt_cache_rebuild_count; } + net->ipv4.sysctl_rt_cache_rebuild_count = 4; + net->ipv4.ipv4_hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); if (net->ipv4.ipv4_hdr == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c5aca0bb116..1f3d52946b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,8 +277,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -atomic_t tcp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; @@ -290,9 +289,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -1680,7 +1682,7 @@ void tcp_set_state(struct sock *sk, int state) inet_put_port(sk); /* fall through */ default: - if (oldstate==TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } @@ -1690,7 +1692,7 @@ void tcp_set_state(struct sock *sk, int state) sk->sk_state = state; #ifdef STATE_TRACE - SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); + SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); #endif } EXPORT_SYMBOL_GPL(tcp_set_state); @@ -1834,7 +1836,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1885,9 +1887,11 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { + int orphan_count = percpu_counter_read_positive( + sk->sk_prot->orphan_count); + sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, - atomic_read(sk->sk_prot->orphan_count))) { + if (tcp_too_many_orphans(sk, orphan_count)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2461,6 +2465,106 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int thlen; + unsigned int flags; + unsigned int total; + unsigned int mss = 1; + int flush = 1; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + th = tcp_hdr(skb); + __skb_pull(skb, thlen); + + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (th->source != th2->source || th->dest != th2->dest) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= flags & TCP_FLAG_CWR; + flush |= (flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); + flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; + flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); + + total = p->len; + mss = total; + if (skb_shinfo(p)->frag_list) + mss = skb_shinfo(p)->frag_list->len; + + flush |= skb->len > mss || skb->len <= 0; + flush |= ntohl(th2->seq) + total != ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = skb->len < mss; + flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | + TCP_FLAG_SYN | TCP_FLAG_FIN); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} + #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; static struct tcp_md5sig_pool **tcp_md5sig_pool; @@ -2650,7 +2754,7 @@ EXPORT_SYMBOL(tcp_md5_hash_key); void tcp_done(struct sock *sk) { - if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); @@ -2685,6 +2789,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); + percpu_counter_init(&tcp_orphan_count, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, @@ -2707,8 +2813,8 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a1221e5e8e..ee467ec40c4 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -1,13 +1,23 @@ /* - * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 + * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of CUBIC TCP in - * Injong Rhee, Lisong Xu. - * "CUBIC: A New TCP-Friendly High-Speed TCP Variant - * in PFLDnet 2005 + * Sangtae Ha, Injong Rhee and Lisong Xu, + * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" + * in ACM SIGOPS Operating System Review, July 2008. * Available from: - * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf + * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf + * + * CUBIC integrates a new slow start algorithm, called HyStart. + * The details of HyStart are presented in + * Sangtae Ha and Injong Rhee, + * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. + * Available from: + * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf + * + * All testing results are available from: + * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing * * Unless CUBIC is enabled and congestion window is large * this behaves the same as the original Reno. @@ -23,12 +33,26 @@ */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ +/* Two methods of hybrid slow start */ +#define HYSTART_ACK_TRAIN 0x1 +#define HYSTART_DELAY 0x2 + +/* Number of delay samples for detecting the increase of delay */ +#define HYSTART_MIN_SAMPLES 8 +#define HYSTART_DELAY_MIN (2U<<3) +#define HYSTART_DELAY_MAX (16U<<3) +#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) + static int fast_convergence __read_mostly = 1; static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; +static int hystart __read_mostly = 1; +static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; +static int hystart_low_window __read_mostly = 16; + static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; static u64 cube_factor __read_mostly; @@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444); MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); +module_param(hystart, int, 0644); +MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); +module_param(hystart_detect, int, 0644); +MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" + " 1: packet-train 2: delay 3: both packet-train and delay"); +module_param(hystart_low_window, int, 0644); +MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); /* BIC TCP Parameters */ struct bictcp { @@ -59,7 +90,13 @@ struct bictcp { u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ #define ACK_RATIO_SHIFT 4 - u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u8 sample_cnt; /* number of samples to decide curr_rtt */ + u8 found; /* the exit point is found? */ + u32 round_start; /* beginning of each round */ + u32 end_seq; /* end_seq of the round */ + u32 last_jiffies; /* last time when the ACK spacing is close */ + u32 curr_rtt; /* the minimum rtt of current round */ }; static inline void bictcp_reset(struct bictcp *ca) @@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; ca->ack_cnt = 0; ca->tcp_cwnd = 0; + ca->found = 0; +} + +static inline void bictcp_hystart_reset(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->round_start = ca->last_jiffies = jiffies; + ca->end_seq = tp->snd_nxt; + ca->curr_rtt = 0; + ca->sample_cnt = 0; } static void bictcp_init(struct sock *sk) { bictcp_reset(inet_csk_ca(sk)); - if (initial_ssthresh) + + if (hystart) + bictcp_hystart_reset(sk); + + if (!hystart && initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } @@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (hystart && after(ack, ca->end_seq)) + bictcp_hystart_reset(sk); tcp_slow_start(tp); - else { + } else { bictcp_update(ca, tp->snd_cwnd); /* In dangerous area, increase slowly. @@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk) static void bictcp_state(struct sock *sk, u8 new_state) { - if (new_state == TCP_CA_Loss) + if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); + bictcp_hystart_reset(sk); + } +} + +static void hystart_update(struct sock *sk, u32 delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + if (!(ca->found & hystart_detect)) { + u32 curr_jiffies = jiffies; + + /* first detection parameter - ack-train detection */ + if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { + ca->last_jiffies = curr_jiffies; + if (curr_jiffies - ca->round_start >= ca->delay_min>>4) + ca->found |= HYSTART_ACK_TRAIN; + } + + /* obtain the minimum delay of more than sampling packets */ + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { + if (ca->curr_rtt == 0 || ca->curr_rtt > delay) + ca->curr_rtt = delay; + + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + + HYSTART_DELAY_THRESH(ca->delay_min>>4)) + ca->found |= HYSTART_DELAY; + } + /* + * Either one of two conditions are met, + * we exit from slow start immediately. + */ + if (ca->found & hystart_detect) + tp->snd_ssthresh = tp->snd_cwnd; + } } /* Track delayed acknowledgment ratio using sliding window @@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; @@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) /* first time call or link delay decreases */ if (ca->delay_min == 0 || ca->delay_min > delay) ca->delay_min = delay; + + /* hystart triggers when cwnd is larger than some threshold */ + if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && + tp->snd_cwnd >= hystart_low_window) + hystart_update(sk, delay); } static struct tcp_congestion_ops cubictcp = { @@ -372,4 +470,4 @@ module_exit(cubictcp_unregister); MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CUBIC TCP"); -MODULE_VERSION("2.2"); +MODULE_VERSION("2.3"); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 838d491dfda..fcbcd4ff6c5 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,7 +34,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } -static struct inet_diag_handler tcp_diag_handler = { +static const struct inet_diag_handler tcp_diag_handler = { .idiag_hashinfo = &tcp_hashinfo, .idiag_get_info = tcp_diag_get_info, .idiag_type = TCPDIAG_GETSOCK, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d77c0d29e23..99b7ecbe889 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -701,13 +701,10 @@ static inline void tcp_set_rto(struct sock *sk) * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ -} -/* NOTE: clamping at TCP_RTO_MIN is not required, current algo - * guarantees that rto is higher. - */ -static inline void tcp_bound_rto(struct sock *sk) -{ + /* NOTE: clamping at TCP_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } @@ -928,7 +925,6 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - tcp_bound_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); @@ -1002,7 +998,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -1236,31 +1233,58 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, return dup_sack; } +struct tcp_sacktag_state { + int reord; + int fack_count; + int flag; +}; + /* Check if skb is fully within the SACK block. In presence of GSO skbs, * the incoming SACK may not exactly match but we can find smaller MSS * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). + * + * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) { int in_sack, err; unsigned int pkt_len; + unsigned int mss; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); if (tcp_skb_pcount(skb) > 1 && !in_sack && after(TCP_SKB_CB(skb)->end_seq, start_seq)) { - + mss = tcp_skb_mss(skb); in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); - if (!in_sack) + if (!in_sack) { pkt_len = start_seq - TCP_SKB_CB(skb)->seq; - else + if (pkt_len < mss) + pkt_len = mss; + } else { pkt_len = end_seq - TCP_SKB_CB(skb)->seq; - err = tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size); + if (pkt_len < mss) + return -EINVAL; + } + + /* Round if necessary so that SACKs cover only full MSSes + * and/or the remaining small portion (if present) + */ + if (pkt_len > mss) { + unsigned int new_len = (pkt_len / mss) * mss; + if (!in_sack && new_len < pkt_len) { + new_len += mss; + if (new_len > skb->len) + return 0; + } + pkt_len = new_len; + } + err = tcp_fragment(sk, skb, pkt_len, mss); if (err < 0) return err; } @@ -1268,24 +1292,25 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, - int *reord, int dup_sack, int fack_count) +static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, + struct tcp_sacktag_state *state, + int dup_sack, int pcount) { struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; - int flag = 0; + int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) - *reord = min(fack_count, *reord); + state->reord = min(fack_count, state->reord); } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - return flag; + return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { @@ -1294,10 +1319,9 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, * that retransmission is still in flight. */ if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= - ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out -= tcp_skb_pcount(skb); - tp->retrans_out -= tcp_skb_pcount(skb); + sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + tp->lost_out -= pcount; + tp->retrans_out -= pcount; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1306,56 +1330,280 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, */ if (before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - *reord = min(fack_count, *reord); + state->reord = min(fack_count, + state->reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) - flag |= FLAG_ONLY_ORIG_SACKED; + state->flag |= FLAG_ONLY_ORIG_SACKED; } if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out -= tcp_skb_pcount(skb); + sacked &= ~TCPCB_LOST; + tp->lost_out -= pcount; } } - TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; - flag |= FLAG_DATA_SACKED; - tp->sacked_out += tcp_skb_pcount(skb); + sacked |= TCPCB_SACKED_ACKED; + state->flag |= FLAG_DATA_SACKED; + tp->sacked_out += pcount; - fack_count += tcp_skb_pcount(skb); + fack_count += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += tcp_skb_pcount(skb); + tp->lost_cnt_hint += pcount; if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R * frames and clear it. undo_retrans is decreased above, L|R frames * are accounted above as well. */ - if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); + if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) { + sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= pcount; } - return flag; + return sacked; +} + +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_sacktag_state *state, + unsigned int pcount, int shifted, int mss) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + + BUG_ON(!pcount); + + /* Tweak before seqno plays */ + if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && + !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + tp->lost_cnt_hint += pcount; + + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; + BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, + * in theory this shouldn't be necessary but as long as DSACK + * code can come after this skb later on it's better to keep + * setting gso_size to something. + */ + if (!skb_shinfo(prev)->gso_size) { + skb_shinfo(prev)->gso_size = mss; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; + } + + /* CHECKME: To clear or not to clear? Mimics normal skb currently */ + if (skb_shinfo(skb)->gso_segs <= 1) { + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; + } + + /* We discard results */ + tcp_sacktag_one(skb, sk, state, 0, pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + + if (skb->len > 0) { + BUG_ON(!tcp_skb_pcount(skb)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); + return 0; + } + + /* Whole SKB was eaten :-) */ + + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = prev; + if (skb == tp->scoreboard_skb_hint) + tp->scoreboard_skb_hint = prev; + if (skb == tp->lost_skb_hint) { + tp->lost_skb_hint = prev; + tp->lost_cnt_hint -= tcp_skb_pcount(prev); + } + + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + + return 1; +} + +/* I wish gso_size would have a bit more sane initialization than + * something-or-zero which complicates things + */ +static int tcp_skb_seglen(struct sk_buff *skb) +{ + return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); +} + +/* Shifting pages past head area doesn't work */ +static int skb_can_shift(struct sk_buff *skb) +{ + return !skb_headlen(skb) && skb_is_nonlinear(skb); +} + +/* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + struct tcp_sacktag_state *state, + u32 start_seq, u32 end_seq, + int dup_sack) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; + int pcount = 0; + int len; + int in_sack; + + if (!sk_can_gso(sk)) + goto fallback; + + /* Normally R but no L won't result in plain S */ + if (!dup_sack && + (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) + goto fallback; + if (!skb_can_shift(skb)) + goto fallback; + /* This frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + goto fallback; + + /* Can only happen with delayed DSACK + discard craziness */ + if (unlikely(skb == tcp_write_queue_head(sk))) + goto fallback; + prev = tcp_write_queue_prev(sk, skb); + + if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto fallback; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (in_sack) { + len = skb->len; + pcount = tcp_skb_pcount(skb); + mss = tcp_skb_seglen(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_skb_seglen(prev)) + goto fallback; + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) + goto noop; + /* CHECKME: This is non-MSS split case only?, this will + * cause skipped skbs due to advancing loop btw, original + * has that feature too + */ + if (tcp_skb_pcount(skb) <= 1) + goto noop; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + if (!in_sack) { + /* TODO: head merge to next could be attempted here + * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), + * though it might not be worth of the additional hassle + * + * ...we can probably just fallback to what was done + * previously. We could try merging non-SACKed ones + * as well but it probably isn't going to buy off + * because later SACKs might again split them, and + * it would make skb timestamp tracking considerably + * harder problem. + */ + goto fallback; + } + + len = end_seq - TCP_SKB_CB(skb)->seq; + BUG_ON(len < 0); + BUG_ON(len > skb->len); + + /* MSS boundaries should be honoured or else pcount will + * severely break even though it makes things bit trickier. + * Optimize common case to avoid most of the divides + */ + mss = tcp_skb_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_skb_seglen(prev)) + goto fallback; + + if (len == mss) { + pcount = 1; + } else if (len < mss) { + goto noop; + } else { + pcount = len / mss; + len = pcount * mss; + } + } + + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss)) + goto out; + + /* Hole filled allows collapsing with the next as well, this is very + * useful when hole on every nth skb pattern happens + */ + if (prev == tcp_write_queue_tail(sk)) + goto out; + skb = tcp_write_queue_next(sk, prev); + + if (!skb_can_shift(skb) || + (skb == tcp_send_head(sk)) || + ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || + (mss != tcp_skb_seglen(skb))) + goto out; + + len = skb->len; + if (skb_shift(prev, skb, len)) { + pcount += tcp_skb_pcount(skb); + tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss); + } + +out: + state->fack_count += pcount; + return prev; + +noop: + return skb; + +fallback: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); + return NULL; } static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, + struct tcp_sacktag_state *state, u32 start_seq, u32 end_seq, - int dup_sack_in, int *fack_count, - int *reord, int *flag) + int dup_sack_in) { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *tmp; + tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1376,17 +1624,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack = 1; } - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, - end_seq); + /* skb reference here is a bit tricky to get right, since + * shifting can eat and free both this skb and the next, + * so not even _safe variant of the loop is enough. + */ + if (in_sack <= 0) { + tmp = tcp_shift_skb_data(sk, skb, state, + start_seq, end_seq, dup_sack); + if (tmp != NULL) { + if (tmp != skb) { + skb = tmp; + continue; + } + + in_sack = 0; + } else { + in_sack = tcp_match_skb_to_sack(sk, skb, + start_seq, + end_seq); + } + } + if (unlikely(in_sack < 0)) break; - if (in_sack) - *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, - *fack_count); + if (in_sack) { + TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk, + state, + dup_sack, + tcp_skb_pcount(skb)); + + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); + } - *fack_count += tcp_skb_pcount(skb); + state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1395,16 +1668,17 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, * a normal way */ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, - u32 skip_to_seq, int *fack_count) + struct tcp_sacktag_state *state, + u32 skip_to_seq) { tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; - if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; - *fack_count += tcp_skb_pcount(skb); + state->fack_count += tcp_skb_pcount(skb); } return skb; } @@ -1412,18 +1686,17 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, - u32 skip_to_seq, - int *fack_count, int *reord, - int *flag) + struct tcp_sacktag_state *state, + u32 skip_to_seq) { if (next_dup == NULL) return skb; if (before(next_dup->start_seq, skip_to_seq)) { - skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - skb = tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq); + skb = tcp_sacktag_walk(skb, sk, NULL, state, + next_dup->start_seq, next_dup->end_seq, + 1); } return skb; @@ -1445,16 +1718,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; + struct tcp_sacktag_state state; struct sk_buff *skb; int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; - int reord = tp->packets_out; - int flag = 0; int found_dup_sack = 0; - int fack_count; int i, j; int first_sack_index; + state.flag = 0; + state.reord = tp->packets_out; + if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; @@ -1464,7 +1738,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) - flag |= FLAG_DSACKING_ACK; + state.flag |= FLAG_DSACKING_ACK; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1533,7 +1807,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, } skb = tcp_write_queue_head(sk); - fack_count = 0; + state.fack_count = 0; i = 0; if (!tp->sacked_out) { @@ -1558,7 +1832,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) - flag |= FLAG_DATA_LOST; + state.flag |= FLAG_DATA_LOST; /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && @@ -1571,13 +1845,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, /* Head todo? */ if (before(start_seq, cache->start_seq)) { - skb = tcp_sacktag_skip(skb, sk, start_seq, - &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, + start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, + &state, start_seq, cache->start_seq, - dup_sack, &fack_count, - &reord, &flag); + dup_sack); } /* Rest of the block already fully processed? */ @@ -1585,9 +1859,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, goto advance_sp; skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, - cache->end_seq, - &fack_count, &reord, - &flag); + &state, + cache->end_seq); /* ...tail remains todo... */ if (tcp_highest_sack_seq(tp) == cache->end_seq) { @@ -1595,13 +1868,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (skb == NULL) break; - fack_count = tp->fackets_out; + state.fack_count = tp->fackets_out; cache++; goto walk; } - skb = tcp_sacktag_skip(skb, sk, cache->end_seq, - &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; @@ -1611,20 +1883,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (skb == NULL) break; - fack_count = tp->fackets_out; + state.fack_count = tp->fackets_out; } - skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count); + skb = tcp_sacktag_skip(skb, sk, &state, start_seq); walk: - skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, - dup_sack, &fack_count, &reord, &flag); + skb = tcp_sacktag_walk(skb, sk, next_dup, &state, + start_seq, end_seq, dup_sack); advance_sp: /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct * due to in-order walk */ if (after(end_seq, tp->frto_highmark)) - flag &= ~FLAG_ONLY_ORIG_SACKED; + state.flag &= ~FLAG_ONLY_ORIG_SACKED; i++; } @@ -1641,10 +1913,10 @@ advance_sp: tcp_verify_left_out(tp); - if ((reord < tp->fackets_out) && + if ((state.reord < tp->fackets_out) && ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) - tcp_update_reordering(sk, tp->fackets_out - reord, 0); + tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1654,13 +1926,13 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - return flag; + return state.flag; } /* Limits sacked_out so that sum with lost_out isn't ever larger than * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -int tcp_limit_reno_sacked(struct tcp_sock *tp) +static int tcp_limit_reno_sacked(struct tcp_sock *tp) { u32 holes; @@ -2336,9 +2608,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - NIPQUAD(inet->daddr), ntohs(inet->dport), + &inet->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2346,9 +2618,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - NIP6(np->daddr), ntohs(inet->dport), + &np->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2559,6 +2831,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk, 0); + u32 prior_lost = tp->lost_out; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (tcp_skb_seglen(skb) > mss && + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + } + tcp_skb_mark_lost_uncond_verify(tp, skb); + } + } + + tcp_clear_retrans_hints_partial(tp); + + if (prior_lost == tp->lost_out) + return; + + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + + tcp_verify_left_out(tp); + + /* Don't muck with the congestion window here. + * Reason is that we do not increase amount of _data_ + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2730,6 +3052,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_xmit_retransmit_queue(sk); } +static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +{ + tcp_rtt_estimator(sk, seq_rtt); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; +} + /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Supersedes RFC1323) */ @@ -2751,11 +3080,8 @@ static void tcp_ack_saw_tstamp(struct sock *sk, int flag) * in window is lost... Voila. --ANK (010210) */ struct tcp_sock *tp = tcp_sk(sk); - const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; - tcp_rtt_estimator(sk, seq_rtt); - tcp_set_rto(sk); - inet_csk(sk)->icsk_backoff = 0; - tcp_bound_rto(sk); + + tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr); } static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) @@ -2772,10 +3098,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(sk, seq_rtt); - tcp_set_rto(sk); - inet_csk(sk)->icsk_backoff = 0; - tcp_bound_rto(sk); + tcp_valid_rtt_meas(sk, seq_rtt); } static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5c8fa7f1e32..10172487921 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) } #endif -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; +struct inet_hashinfo tcp_hashinfo; static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -492,7 +488,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, + csum_partial(th, th->doff << 2, skb->csum)); } @@ -726,7 +722,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, - csum_partial((char *)th, skb->len, + csum_partial(th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, @@ -1139,10 +1135,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash failed for " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), + printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", + &iph->saddr, ntohs(th->source), + &iph->daddr, ntohs(th->dest), genhash ? " tcp_v4_calc_md5_hash failed" : ""); } return 1; @@ -1297,10 +1292,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " - "request from " NIPQUAD_FMT "/%u\n", - NIPQUAD(saddr), - ntohs(tcp_hdr(skb)->source)); + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", + &saddr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } @@ -1804,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1852,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1860,32 +1853,35 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) { - return hlist_empty(head) ? NULL : + return hlist_nulls_empty(head) ? NULL : list_entry(head->first, struct inet_timewait_sock, tw_node); } static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; + return !is_a_nulls(tw->tw_node.next) ? + hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *sk = cur; - struct tcp_iter_state* st = seq->private; + struct inet_listen_hashbucket *ilb; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_hashinfo.listening_hash[0]); + ilb = &tcp_hashinfo.listening_hash[0]; + spin_lock_bh(&ilb->lock); + sk = sk_nulls_head(&ilb->head); goto get_sk; } - + ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; if (st->state == TCP_SEQ_STATE_OPENREQ) { @@ -1918,7 +1914,7 @@ get_req: sk = sk_next(sk); } get_sk: - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { cur = sk; goto out; @@ -1935,8 +1931,11 @@ start_req: } read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } + spin_unlock_bh(&ilb->lock); if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock_bh(&ilb->lock); + sk = sk_nulls_head(&ilb->head); goto get_sk; } cur = NULL; @@ -1957,28 +1956,28 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) static inline int empty_bucket(struct tcp_iter_state *st) { - return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } static void *established_get_first(struct seq_file *seq) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ if (empty_bucket(st)) continue; - read_lock_bh(lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + spin_lock_bh(lock); + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { continue; @@ -1996,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(lock); + spin_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2007,8 +2006,8 @@ static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; struct inet_timewait_sock *tw; - struct hlist_node *node; - struct tcp_iter_state* st = seq->private; + struct hlist_nulls_node *node; + struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); ++st->num; @@ -2024,7 +2023,7 @@ get_tw: cur = tw; goto out; } - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ @@ -2034,12 +2033,12 @@ get_tw: if (st->bucket >= tcp_hashinfo.ehash_size) return NULL; - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else - sk = sk_next(sk); + sk = sk_nulls_next(sk); - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) goto found; } @@ -2067,14 +2066,12 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_get_idx(struct seq_file *seq, loff_t pos) { void *rc; - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; - inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2084,7 +2081,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; st->state = TCP_SEQ_STATE_LISTENING; st->num = 0; return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -2093,7 +2090,7 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { void *rc = NULL; - struct tcp_iter_state* st; + struct tcp_iter_state *st; if (v == SEQ_START_TOKEN) { rc = tcp_get_idx(seq, 0); @@ -2106,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2123,7 +2119,7 @@ out: static void tcp_seq_stop(struct seq_file *seq, void *v) { - struct tcp_iter_state* st = seq->private; + struct tcp_iter_state *st = seq->private; switch (st->state) { case TCP_SEQ_STATE_OPENREQ: @@ -2133,12 +2129,12 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - inet_listen_unlock(&tcp_hashinfo); + spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } @@ -2284,7 +2280,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw, static int tcp4_seq_show(struct seq_file *seq, void *v) { - struct tcp_iter_state* st; + struct tcp_iter_state *st; int len; if (v == SEQ_START_TOKEN) { @@ -2350,6 +2346,41 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + /* fall through */ + case CHECKSUM_NONE: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp4_gro_receive); + +int tcp4_gro_complete(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp4_gro_complete); + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, @@ -2378,6 +2409,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, @@ -2407,6 +2439,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { + inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_device(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 779f2e9d068..f67effbb102 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -491,7 +491,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, * as a request_sock. */ -struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, +struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fe3b4bdfd25..557fe16cbfb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -42,7 +42,7 @@ /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; -/* People can turn this on to work with those rare, broken TCPs that +/* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; @@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { @@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - if(likely(opts->ws)) + if (likely(opts->ws)) size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { @@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ - if (unlikely(tcp_urg_mode(tp) && - between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { - th->urg_ptr = htons(tp->snd_up - tcb->seq); - th->urg = 1; + if (unlikely(tcp_urg_mode(tp))) { + if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); + th->urg = 1; + } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { + th->urg_ptr = 0xFFFF; + th->urg = 1; + } } tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); @@ -1168,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, static inline int tcp_minshall_check(const struct tcp_sock *tp) { - return after(tp->snd_sml,tp->snd_una) && + return after(tp->snd_sml, tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt); } @@ -1334,7 +1338,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* Defer for less than two clock ticks. */ if (tp->tso_deferred && - ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1519,7 +1523,8 @@ static int tcp_mtu_probe(struct sock *sk) * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ -static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) +static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1527,20 +1532,16 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) int cwnd_quota; int result; - /* If we are closed, the bytes will have to remain here. - * In time closedown will finish, we empty the write queue and all - * will be happy. - */ - if (unlikely(sk->sk_state == TCP_CLOSE)) - return 0; - sent_pkts = 0; - /* Do MTU probing. */ - if ((result = tcp_mtu_probe(sk)) == 0) { - return 0; - } else if (result > 0) { - sent_pkts = 1; + if (!push_one) { + /* Do MTU probing. */ + result = tcp_mtu_probe(sk); + if (!result) { + return 0; + } else if (result > 0) { + sent_pkts = 1; + } } while ((skb = tcp_send_head(sk))) { @@ -1562,7 +1563,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (tcp_tso_should_defer(sk, skb)) + if (!push_one && tcp_tso_should_defer(sk, skb)) break; } @@ -1577,7 +1578,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; /* Advance the send_head. This one is sent out. @@ -1587,6 +1588,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_minshall_update(tp, mss_now, skb); sent_pkts++; + + if (push_one) + break; } if (likely(sent_pkts)) { @@ -1605,10 +1609,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, { struct sk_buff *skb = tcp_send_head(sk); - if (skb) { - if (tcp_write_xmit(sk, cur_mss, nonagle)) - tcp_check_probe_timer(sk); - } + if (!skb) + return; + + /* If we are closed, the bytes will have to remain here. + * In time closedown will finish, we empty the write queue and + * all will be happy. + */ + if (unlikely(sk->sk_state == TCP_CLOSE)) + return; + + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + tcp_check_probe_timer(sk); } /* Send _single_ skb sitting at the send head. This function requires @@ -1616,38 +1628,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); - unsigned int tso_segs, cwnd_quota; BUG_ON(!skb || skb->len < mss_now); - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); - cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); - - if (likely(cwnd_quota)) { - unsigned int limit; - - BUG_ON(!tso_segs); - - limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) - limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); - - if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - - /* Send it out now. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; - - if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { - tcp_event_new_data_sent(sk, skb); - tcp_cwnd_validate(sk); - return; - } - } + tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); } /* This function returns the amount that we can raise the @@ -1767,46 +1752,22 @@ u32 __tcp_select_window(struct sock *sk) return window; } -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, - int mss_now) +/* Collapses two adjacent SKB's during retransmission. */ +static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); int skb_size, next_skb_size; u16 flags; - /* The first test we must make is that neither of these two - * SKB's are still referenced by someone else. - */ - if (skb_cloned(skb) || skb_cloned(next_skb)) - return; - skb_size = skb->len; next_skb_size = next_skb->len; flags = TCP_SKB_CB(skb)->flags; - /* Also punt if next skb has been SACK'd. */ - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) - return; - - /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) - return; - - /* Punt if not enough space exists in the first SKB for - * the data in the second, or the total combined payload - * would exceed the MSS. - */ - if ((next_skb_size > skb_tailroom(skb)) || - ((skb_size + next_skb_size) > mss_now)) - return; - BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); tcp_highest_sack_combine(sk, next_skb, skb); - /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), @@ -1848,54 +1809,60 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, sk_wmem_free_skb(sk, next_skb); } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) +static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +{ + if (tcp_skb_pcount(skb) > 1) + return 0; + /* TODO: SACK collapsing could be used to remove this condition */ + if (skb_shinfo(skb)->nr_frags != 0) + return 0; + if (skb_cloned(skb)) + return 0; + if (skb == tcp_send_head(sk)) + return 0; + /* Some heurestics for collapsing over SACK'd could be invented */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + return 0; + + return 1; +} + +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, + int space) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); - u32 prior_lost = tp->lost_out; + struct sk_buff *skb = to, *tmp; + int first = 1; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) + if (!sysctl_tcp_retrans_collapse) + return; + if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + return; + + tcp_for_write_queue_from_safe(skb, tmp, sk) { + if (!tcp_can_collapse(sk, skb)) break; - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } - } - tcp_clear_retrans_hints_partial(tp); + space -= skb->len; - if (prior_lost == tp->lost_out) - return; + if (first) { + first = 0; + continue; + } - if (tcp_is_reno(tp)) - tcp_limit_reno_sacked(tp); + if (space < 0) + break; + /* Punt if not enough space exists in the first SKB for + * the data in the second + */ + if (skb->len > skb_tailroom(to)) + break; - tcp_verify_left_out(tp); + if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) + break; - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); + tcp_collapse_retrans(sk, to); } - tcp_xmit_retransmit_queue(sk); } /* This retransmits one SKB. Policy decisions and retransmit queue @@ -1947,17 +1914,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; /* We'll try again later. */ } - /* Collapse two adjacent packets if worthwhile and we can. */ - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && - (skb->len < (cur_mss >> 1)) && - (!tcp_skb_is_last(sk, skb)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && - (skb_shinfo(skb)->nr_frags == 0 && - skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && - tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && - (sysctl_tcp_retrans_collapse != 0)) - tcp_retrans_try_collapse(sk, skb, cur_mss); + tcp_retrans_try_collapse(sk, skb, cur_mss); /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 7ddc30f0744..25524d4e372 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -153,12 +153,11 @@ static int tcpprobe_sprint(char *tbuf, int n) = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); return snprintf(tbuf, n, - "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u" - " %d %#x %#x %u %u %u %u\n", + "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, - NIPQUAD(p->saddr), ntohs(p->sport), - NIPQUAD(p->daddr), ntohs(p->dport), + &p->saddr, ntohs(p->sport), + &p->daddr, ntohs(p->dport), p->length, p->snd_nxt, p->snd_una, p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6b6dff1164b..0170e914f1b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = atomic_read(&tcp_orphan_count); + int orphans = percpu_counter_read_positive(&tcp_orphan_count); /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ @@ -171,7 +171,7 @@ static int tcp_write_timeout(struct sock *sk) static void tcp_delack_timer(unsigned long data) { - struct sock *sk = (struct sock*)data; + struct sock *sk = (struct sock *)data; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -299,15 +299,15 @@ static void tcp_retransmit_timer(struct sock *sk) #ifdef TCP_DEBUG struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", - NIPQUAD(inet->daddr), ntohs(inet->dport), + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", + &inet->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", - NIP6(np->daddr), ntohs(inet->dport), + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", + &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #endif @@ -396,7 +396,7 @@ out:; static void tcp_write_timer(unsigned long data) { - struct sock *sk = (struct sock*)data; + struct sock *sk = (struct sock *)data; struct inet_connection_sock *icsk = inet_csk(sk); int event; diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 7cd22262de3..a453aac91bd 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -40,18 +40,14 @@ #include "tcp_vegas.h" -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 -static int alpha = 2<<V_PARAM_SHIFT; -static int beta = 4<<V_PARAM_SHIFT; -static int gamma = 1<<V_PARAM_SHIFT; +static int alpha = 2; +static int beta = 4; +static int gamma = 1; module_param(alpha, int, 0644); -MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)"); +MODULE_PARM_DESC(alpha, "lower bound of packets in network"); module_param(beta, int, 0644); -MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)"); +MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); @@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) return; } - /* The key players are v_beg_snd_una and v_beg_snd_nxt. - * - * These are so named because they represent the approximate values - * of snd_una and snd_nxt at the beginning of the current RTT. More - * precisely, they represent the amount of data sent during the RTT. - * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, - * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding - * bytes of data have been ACKed during the course of the RTT, giving - * an "actual" rate of: - * - * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) - * - * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, - * because delayed ACKs can cover more than one segment, so they - * don't line up nicely with the boundaries of RTTs. - * - * Another unfortunate fact of life is that delayed ACKs delay the - * advance of the left edge of our send window, so that the number - * of bytes we send in an RTT is often less than our cwnd will allow. - * So we keep track of our cwnd separately, in v_beg_snd_cwnd. - */ - if (after(ack, vegas->beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; /* Save the extent of the current window so we can use this * at the end of the next RTT. */ - vegas->beg_snd_una = vegas->beg_snd_nxt; vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * * This is: * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. */ - target_cwnd = ((u64)old_wnd * vegas->baseRTT); - target_cwnd <<= V_PARAM_SHIFT; - do_div(target_cwnd, rtt); + target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. */ - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tp->snd_ssthresh > 2 ) { /* Going too fast. Time to slow down @@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, - ((u32)target_cwnd >> - V_PARAM_SHIFT)+1); + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ tcp_slow_start(tp); } else { /* Congestion avoidance. */ - u32 next_snd_cwnd; /* Figure out where we would like cwnd * to be. @@ -300,26 +249,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* The old window was too fast, so * we slow down. */ - next_snd_cwnd = old_snd_cwnd - 1; + tp->snd_cwnd--; } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - next_snd_cwnd = old_snd_cwnd + 1; + tp->snd_cwnd++; } else { /* Sending just as fast as we * should be. */ - next_snd_cwnd = old_snd_cwnd; } - - /* Adjust cwnd upward or downward, toward the - * desired value. - */ - if (next_snd_cwnd > tp->snd_cwnd) - tp->snd_cwnd++; - else if (next_snd_cwnd < tp->snd_cwnd) - tp->snd_cwnd--; } if (tp->snd_cwnd < 2) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e03b10183a8..9ec843a9bbb 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -83,7 +83,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) else if (!yeah->doing_reno_now) { /* Scalable */ - tp->snd_cwnd_cnt+=yeah->pkts_acked; + tp->snd_cwnd_cnt += yeah->pkts_acked; if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; @@ -224,7 +224,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); } else - reduction = max(tp->snd_cwnd>>1,2U); + reduction = max(tp->snd_cwnd>>1, 2U); yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 98c1fd09be8..cf5ab0581eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -81,6 +81,8 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/module.h> @@ -104,12 +106,8 @@ #include <net/xfrm.h> #include "udp_impl.h" -/* - * Snmp MIB for the UDP layer - */ - -struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -DEFINE_RWLOCK(udp_hash_lock); +struct udp_table udp_table; +EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; int sysctl_udp_rmem_min __read_mostly; @@ -123,15 +121,15 @@ atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); static int udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[], + const struct udp_hslot *hslot, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) { struct sock *sk2; - struct hlist_node *node; + struct hlist_nulls_node *node; - sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -154,12 +152,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { - struct hlist_head *udptable = sk->sk_prot->h.udp_hash; + struct udp_hslot *hslot; + struct udp_table *udptable = sk->sk_prot->h.udp_table; int error = 1; struct net *net = sock_net(sk); - write_lock_bh(&udp_hash_lock); - if (!snum) { int low, high, remaining; unsigned rand; @@ -171,26 +168,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, rand = net_random(); snum = first = rand % remaining + low; rand |= 1; - while (udp_lib_lport_inuse(net, snum, udptable, sk, - saddr_comp)) { + for (;;) { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + break; + spin_unlock_bh(&hslot->lock); do { snum = snum + rand; } while (snum < low || snum > high); if (snum == first) goto fail; } - } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) - goto fail; - + } else { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + goto fail_unlock; + } inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); + sk_nulls_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; +fail_unlock: + spin_unlock_bh(&hslot->lock); fail: - write_unlock_bh(&udp_hash_lock); return error; } @@ -208,63 +213,91 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, + unsigned short hnum, + __be16 sport, __be32 daddr, __be16 dport, int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + struct inet_sock *inet = inet_sk(sk); + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + return -1; + score += 2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + return -1; + score += 2; + } + if (inet->dport) { + if (inet->dport != sport) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; - struct hlist_node *node; + struct sock *sk, *result; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness; + + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { + score = compute_score(sk, net, saddr, hnum, sport, + daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, saddr, hnum, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; const struct iphdr *iph = ip_hdr(skb); @@ -280,7 +313,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -289,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net) || @@ -324,7 +357,7 @@ found: * to find the appropriate port. */ -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -393,7 +426,7 @@ out: void udp_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udp_hash); + __udp4_lib_err(skb, info, &udp_table); } /* @@ -686,7 +719,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); @@ -935,6 +968,23 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +void udp_lib_unhash(struct sock *sk) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); + struct udp_hslot *hslot = &udptable->hash[hash]; + + spin_lock_bh(&hslot->lock); + if (sk_nulls_del_node_init_rcu(sk)) { + inet_sk(sk)->num = 0; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + } + spin_unlock_bh(&hslot->lock); + } +} +EXPORT_SYMBOL(udp_lib_unhash); + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int is_udplite = IS_UDPLITE(sk); @@ -1073,13 +1123,14 @@ drop: static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_nulls_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1088,7 +1139,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (sknext) @@ -1105,7 +1156,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } while (sknext); } else kfree_skb(skb); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -1151,7 +1202,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, * All we need to do is get the socket, and then do a checksum. */ -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -1219,13 +1270,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - NIPQUAD(saddr), + &saddr, ntohs(uh->source), ulen, skb->len, - NIPQUAD(daddr), + &daddr, ntohs(uh->dest)); goto drop; @@ -1234,11 +1285,11 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - NIPQUAD(saddr), + &saddr, ntohs(uh->source), - NIPQUAD(daddr), + &daddr, ntohs(uh->dest), ulen); drop: @@ -1249,7 +1300,7 @@ drop: int udp_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); } void udp_destroy_sock(struct sock *sk) @@ -1491,7 +1542,8 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udp_hash, + .slab_flags = SLAB_DESTROY_BY_RCU, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -1501,20 +1553,23 @@ struct proto udp_prot = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -static struct sock *udp_get_first(struct seq_file *seq) +static struct sock *udp_get_first(struct seq_file *seq, int start) { struct sock *sk; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; - sk_for_each(sk, node, state->hashtable + state->bucket) { + for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + struct hlist_nulls_node *node; + struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + spin_lock_bh(&hslot->lock); + sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) goto found; } + spin_unlock_bh(&hslot->lock); } sk = NULL; found: @@ -1527,21 +1582,19 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_next(sk); -try_again: - ; + sk = sk_nulls_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); - if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { - sk = sk_head(state->hashtable + state->bucket); - goto try_again; + if (!sk) { + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); + return udp_get_first(seq, state->bucket + 1); } return sk; } static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) { - struct sock *sk = udp_get_first(seq); + struct sock *sk = udp_get_first(seq, 0); if (sk) while (pos && (sk = udp_get_next(seq, sk)) != NULL) @@ -1550,9 +1603,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) } static void *udp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(udp_hash_lock) { - read_lock(&udp_hash_lock); return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1570,9 +1621,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void udp_seq_stop(struct seq_file *seq, void *v) - __releases(udp_hash_lock) { - read_unlock(&udp_hash_lock); + struct udp_iter_state *state = seq->private; + + if (state->bucket < UDP_HTABLE_SIZE) + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } static int udp_seq_open(struct inode *inode, struct file *file) @@ -1588,7 +1641,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; - s->hashtable = afinfo->hashtable; + s->udp_table = afinfo->udp_table; return err; } @@ -1660,7 +1713,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp4_seq_afinfo = { .name = "udp", .family = AF_INET, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1695,16 +1748,28 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +void __init udp_table_init(struct udp_table *table) +{ + int i; + + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + spin_lock_init(&table->hash[i].lock); + } +} + void __init udp_init(void) { - unsigned long limit; + unsigned long nr_pages, limit; + udp_table_init(&udp_table); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; @@ -1715,8 +1780,6 @@ void __init udp_init(void) } EXPORT_SYMBOL(udp_disconnect); -EXPORT_SYMBOL(udp_hash); -EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2e9bad2fa1b..9f4a6165f72 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -5,8 +5,8 @@ #include <net/protocol.h> #include <net/inet_common.h> -extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); -extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); +extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int ); +extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); extern int udp_v4_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3c807964da9..c784891cb7e 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -12,16 +12,17 @@ */ #include "udp_impl.h" -struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +struct udp_table udplite_table; +EXPORT_SYMBOL(udplite_table); static int udplite_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplite_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udplite_hash); + __udp4_lib_err(skb, info, &udplite_table); } static struct net_protocol udplite_protocol = { @@ -50,7 +51,8 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udplite_hash, + .slab_flags = SLAB_DESTROY_BY_RCU, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -71,7 +73,7 @@ static struct inet_protosw udplite4_protosw = { static struct udp_seq_afinfo udplite4_seq_afinfo = { .name = "udplite", .family = AF_INET, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -108,6 +110,7 @@ static inline int udplite4_proc_init(void) void __init udplite4_register(void) { + udp_table_init(&udplite_table); if (proto_register(&udplite_prot, 1)) goto out_register_err; @@ -126,5 +129,4 @@ out_register_err: printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); } -EXPORT_SYMBOL(udplite_hash); EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 390dcb1354a..4ec2162a437 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -78,7 +78,6 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct udphdr *uh; struct iphdr *iph; int iphlen, len; - int ret; __u8 *udpdata; __be32 *udpdata32; @@ -152,8 +151,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) skb_reset_transport_header(skb); /* process ESP */ - ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - return ret; + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); drop: kfree_skb(skb); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c63de0a72ab..2ad24ba31f9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -18,7 +18,8 @@ static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = { @@ -36,19 +37,20 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) fl.fl4_src = saddr->a4; - err = __ip_route_output_key(&init_net, &rt, &fl); + err = __ip_route_output_key(net, &rt, &fl); dst = &rt->u.dst; if (err) dst = ERR_PTR(err); return dst; } -static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm4_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(0, NULL, daddr); + dst = xfrm4_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -65,7 +67,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) read_lock_bh(&policy->lock); for (dst = policy->bundles; dst; dst = dst->next) { - struct xfrm_dst *xdst = (struct xfrm_dst*)dst; + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && @@ -187,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(); + xfrm4_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); } @@ -246,7 +248,6 @@ static struct dst_ops xfrm4_dst_ops = { .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 55dc6beab9a..1ef1366a0a0 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -13,8 +13,6 @@ #include <linux/ipsec.h> #include <linux/netfilter_ipv4.h> -static struct xfrm_state_afinfo xfrm4_state_afinfo; - static int xfrm4_init_flags(struct xfrm_state *x) { if (ipv4_config.no_pmtu_disc) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d9da5eb9dcb..e92ad8455c6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2031,8 +2031,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT) { + const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; - mm_segment_t oldfs; struct ip_tunnel_parm p; err = -EADDRNOTAVAIL; @@ -2048,9 +2048,14 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) p.iph.ttl = 64; ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); + if (ops->ndo_do_ioctl) { + mm_segment_t oldfs = get_fs(); + + set_fs(KERNEL_DS); + err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + } else + err = -EOPNOTSUPP; if (err == 0) { err = -ENOBUFS; @@ -2988,9 +2993,8 @@ static void if6_seq_stop(struct seq_file *seq, void *v) static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, - NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", - NIP6(ifp->addr), + seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n", + &ifp->addr, ifp->idev->dev->ifindex, ifp->prefix_len, ifp->scope, @@ -4033,8 +4037,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &addrconf_sysctl_forward, - .strategy = &addrconf_sysctl_forward_strategy, + .proc_handler = addrconf_sysctl_forward, + .strategy = addrconf_sysctl_forward_strategy, }, { .ctl_name = NET_IPV6_HOP_LIMIT, @@ -4050,7 +4054,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA, @@ -4058,7 +4062,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, @@ -4066,7 +4070,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_AUTOCONF, @@ -4074,7 +4078,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_DAD_TRANSMITS, @@ -4082,7 +4086,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICITS, @@ -4090,7 +4094,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, @@ -4098,8 +4102,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, @@ -4107,8 +4111,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_FORCE_MLD_VERSION, @@ -4116,7 +4120,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_PRIVACY { @@ -4125,7 +4129,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_VALID_LFT, @@ -4133,7 +4137,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, @@ -4141,7 +4145,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_REGEN_MAX_RETRY, @@ -4149,7 +4153,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, @@ -4157,7 +4161,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4166,7 +4170,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, @@ -4174,7 +4178,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, @@ -4182,7 +4186,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_ROUTER_PREF { @@ -4191,7 +4195,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, @@ -4199,8 +4203,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { @@ -4209,7 +4213,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif #endif @@ -4219,7 +4223,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, @@ -4227,7 +4231,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { @@ -4236,7 +4240,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif @@ -4247,7 +4251,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -4256,7 +4260,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -4264,7 +4268,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0, /* sentinel */ diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 08909039d87..6ff73c4c126 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,10 +186,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", - __func__, - NIP6(*addr), type, ifindex, - label); + ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", + __func__, addr, type, ifindex, label); return label; } @@ -203,11 +201,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label); + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -294,12 +289,9 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label, - replace); + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, + replace); newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -321,10 +313,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -347,10 +337,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac88851..437b750b98f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2ff0c8233e4..52449f7a1b7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -407,6 +407,7 @@ out: static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -415,12 +416,12 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n", - ntohl(ah->spi), NIP6(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", + ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); } @@ -509,9 +510,7 @@ static void ah6_destroy(struct xfrm_state *x) return; kfree(ahp->work_icv); - ahp->work_icv = NULL; crypto_free_hash(ahp->tfm); - ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 8336cd81cb4..1ae58bec1de 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -512,11 +512,9 @@ static int ac6_seq_show(struct seq_file *seq, void *v) struct ifacaddr6 *im = (struct ifacaddr6 *)v; struct ac6_iter_state *state = ac6_seq_private(seq); - seq_printf(seq, - "%-4d %-15s " NIP6_SEQFMT " %5d\n", + seq_printf(seq, "%-4d %-15s %pi6 %5d\n", state->dev->ifindex, state->dev->name, - NIP6(im->aca_addr), - im->aca_users); + &im->aca_addr, im->aca_users); return 0; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e44deb8d4df..e2bdc6d83a4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b181b08fb76..c2f250150db 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -356,6 +356,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset); struct xfrm_state *x; @@ -364,11 +365,11 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", - ntohl(esph->spi), NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", + ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6bfffec2371..1c7f400a3cf 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9b7d19ae5ce..4f433847d95 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) @@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb) int type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; @@ -680,8 +681,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(*saddr), NIP6(*daddr)); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + saddr, daddr); goto discard_it; } } @@ -955,8 +956,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32..3c3732d50c1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a565825..8fe267feb81 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,26 +25,30 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; - inet_listen_wlock(hashinfo); + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock(&ilb->lock); + __sk_nulls_add_node_rcu(sk, &ilb->head); + spin_unlock(&ilb->lock); } else { unsigned int hash; + struct hlist_nulls_head *list; + spinlock_t *lock; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); - write_lock(lock); + spin_lock(lock); + __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -63,77 +67,122 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); +static int inline compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct in6_addr *daddr, + const int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; - const struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore = 0; - - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, - &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && - sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } + const struct hlist_nulls_node *node; + struct sock *result; + int score, hiscore; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + hiscore = score; + result = sk; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; } } - if (result) - sock_hold(result); - read_unlock(&hashinfo->lhash_lock); + rcu_read_unlock(); return result; } @@ -170,16 +219,15 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +240,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,10 +251,10 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -221,7 +269,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 37a4e777e34..c62dd247774 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -388,7 +388,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, fl->owner = current->pid; break; case IPV6_FL_S_USER: - fl->owner = current->euid; + fl->owner = current_euid(); break; default: err = -EINVAL; @@ -464,7 +464,7 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { - int err; + int uninitialized_var(err); struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; @@ -696,14 +696,14 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) else { struct ip6_flowlabel *fl = v; seq_printf(seq, - "%05X %-1d %-6d %-6d %-6ld %-8ld " NIP6_SEQFMT " %-4d\n", + "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned)ntohl(fl->label), fl->share, (unsigned)fl->owner, atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, - NIP6(fl->dst), + &fl->dst, fl->opt ? fl->opt->opt_nflen : 0); } return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c77db0b95e2..4b15938bef4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -137,7 +137,8 @@ static int ip6_output2(struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || + ((mroute6_socket(dev_net(dev)) && + !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); @@ -490,7 +491,7 @@ int ip6_forward(struct sk_buff *skb) We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb->sp) { + !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 64ce3d33d9c..58e2b0d9375 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -74,8 +74,8 @@ MODULE_LICENSE("GPL"); (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ (HASH_SIZE - 1)) -static int ip6_fb_tnl_dev_init(struct net_device *dev); -static int ip6_tnl_dev_init(struct net_device *dev); +static void ip6_fb_tnl_dev_init(struct net_device *dev); +static void ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static int ip6_tnl_net_id; @@ -249,7 +249,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) } t = netdev_priv(dev); - dev->init = ip6_tnl_dev_init; + ip6_tnl_dev_init(dev); t->parms = *p; if ((err = register_netdevice(dev)) < 0) @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } @@ -1150,7 +1151,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) * ip6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters - * @active: != 0 if tunnel is ready for use * * Description: * ip6_tnl_change() updates the tunnel parameters @@ -1306,6 +1306,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return 0; } + +static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_start_xmit = ip6_tnl_xmit, + .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_change_mtu = ip6_tnl_change_mtu, +}; + /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel @@ -1316,11 +1324,8 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static void ip6_tnl_dev_setup(struct net_device *dev) { - dev->uninit = ip6_tnl_dev_uninit; + dev->netdev_ops = &ip6_tnl_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ip6_tnl_xmit; - dev->do_ioctl = ip6_tnl_ioctl; - dev->change_mtu = ip6_tnl_change_mtu; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); @@ -1349,13 +1354,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) * @dev: virtual device associated with tunnel **/ -static int -ip6_tnl_dev_init(struct net_device *dev) +static void ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); ip6_tnl_dev_init_gen(dev); ip6_tnl_link_config(t); - return 0; } /** @@ -1365,8 +1368,7 @@ ip6_tnl_dev_init(struct net_device *dev) * Return: 0 **/ -static int -ip6_fb_tnl_dev_init(struct net_device *dev) +static void ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); @@ -1376,7 +1378,6 @@ ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); ip6n->tnls_wc[0] = t; - return 0; } static struct xfrm6_tunnel ip4ip6_handler = { @@ -1428,10 +1429,10 @@ static int ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; - - ip6n->fb_tnl_dev->init = ip6_fb_tnl_dev_init; dev_net_set(ip6n->fb_tnl_dev, net); + ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0524769632e..3c51b2d827f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -49,9 +49,6 @@ #include <net/addrconf.h> #include <linux/netfilter_ipv6.h> -struct sock *mroute6_socket; - - /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ @@ -62,22 +59,9 @@ static DEFINE_RWLOCK(mrt_lock); * Multicast router control variables */ -static struct mif_device vif6_table[MAXMIFS]; /* Devices */ -static int maxvif; - -#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) - -static int mroute_do_assert; /* Set in PIM assert */ -#ifdef CONFIG_IPV6_PIMSM_V2 -static int mroute_do_pim; -#else -#define mroute_do_pim 0 -#endif - -static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */ +#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ -static atomic_t cache_resolve_queue_len; /* Size of unresolved */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -93,8 +77,10 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert); +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, + mifi_t mifi, int assert); static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct net *net); #ifdef CONFIG_IPV6_PIMSM_V2 static struct inet6_protocol pim6_protocol; @@ -106,19 +92,22 @@ static struct timer_list ipmr_expire_timer; #ifdef CONFIG_PROC_FS struct ipmr_mfc_iter { + struct seq_net_private p; struct mfc6_cache **cache; int ct; }; -static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) +static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, + struct ipmr_mfc_iter *it, loff_t pos) { struct mfc6_cache *mfc; - it->cache = mfc6_cache_array; + it->cache = net->ipv6.mfc6_cache_array; read_lock(&mrt_lock); - for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++) - for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) + for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) + for (mfc = net->ipv6.mfc6_cache_array[it->ct]; + mfc; mfc = mfc->next) if (pos-- == 0) return mfc; read_unlock(&mrt_lock); @@ -126,7 +115,8 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) it->cache = &mfc_unres_queue; spin_lock_bh(&mfc_unres_lock); for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) - if (pos-- == 0) + if (net_eq(mfc6_net(mfc), net) && + pos-- == 0) return mfc; spin_unlock_bh(&mfc_unres_lock); @@ -142,17 +132,19 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) */ struct ipmr_vif_iter { + struct seq_net_private p; int ct; }; -static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, +static struct mif_device *ip6mr_vif_seq_idx(struct net *net, + struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { - if (!MIF_EXISTS(iter->ct)) + for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) { + if (!MIF_EXISTS(net, iter->ct)) continue; if (pos-- == 0) - return &vif6_table[iter->ct]; + return &net->ipv6.vif6_table[iter->ct]; } return NULL; } @@ -160,23 +152,26 @@ static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { + struct net *net = seq_file_net(seq); + read_lock(&mrt_lock); - return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1) - : SEQ_START_TOKEN); + return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) + : SEQ_START_TOKEN; } static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ipmr_vif_iter *iter = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ip6mr_vif_seq_idx(iter, 0); + return ip6mr_vif_seq_idx(net, iter, 0); - while (++iter->ct < maxvif) { - if (!MIF_EXISTS(iter->ct)) + while (++iter->ct < net->ipv6.maxvif) { + if (!MIF_EXISTS(net, iter->ct)) continue; - return &vif6_table[iter->ct]; + return &net->ipv6.vif6_table[iter->ct]; } return NULL; } @@ -189,6 +184,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_net(seq); + if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); @@ -198,7 +195,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - vif6_table, + vif - net->ipv6.vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -215,8 +212,8 @@ static struct seq_operations ip6mr_vif_seq_ops = { static int ip6mr_vif_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip6mr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + return seq_open_net(inode, file, &ip6mr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); } static struct file_operations ip6mr_vif_fops = { @@ -224,24 +221,27 @@ static struct file_operations ip6mr_vif_fops = { .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) - : SEQ_START_TOKEN); + struct net *net = seq_file_net(seq); + + return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) + : SEQ_START_TOKEN; } static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct mfc6_cache *mfc = v; struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(seq->private, 0); + return ipmr_mfc_seq_idx(net, seq->private, 0); if (mfc->next) return mfc->next; @@ -249,10 +249,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != mfc6_cache_array); + BUG_ON(it->cache != net->ipv6.mfc6_cache_array); - while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) { - mfc = mfc6_cache_array[it->ct]; + while (++it->ct < MFC6_LINES) { + mfc = net->ipv6.mfc6_cache_array[it->ct]; if (mfc) return mfc; } @@ -277,16 +277,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) { struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mfc6_cache_array) + else if (it->cache == net->ipv6.mfc6_cache_array) read_unlock(&mrt_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) { int n; + struct net *net = seq_file_net(seq); if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -297,23 +299,28 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, - NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", - NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), - mfc->mf6c_parent, - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); + seq_printf(seq, "%pI6 %pI6 %-3hd", + &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, + mfc->mf6c_parent); if (it->cache != &mfc_unres_queue) { + seq_printf(seq, " %8lu %8lu %8lu", + mfc->mfc_un.res.pkt, + mfc->mfc_un.res.bytes, + mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(n) && + if (MIF_EXISTS(net, n) && mfc->mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } + } else { + /* unresolved mfc_caches don't contain + * pkt, bytes and wrong_if values + */ + seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul); } seq_putc(seq, '\n'); } @@ -329,8 +336,8 @@ static struct seq_operations ipmr_mfc_seq_ops = { static int ipmr_mfc_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + return seq_open_net(inode, file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); } static struct file_operations ip6mr_mfc_fops = { @@ -338,18 +345,19 @@ static struct file_operations ip6mr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif #ifdef CONFIG_IPV6_PIMSM_V2 -static int reg_vif_num = -1; static int pim6_rcv(struct sk_buff *skb) { struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; + struct net *net = dev_net(skb->dev); + int reg_vif_num = net->ipv6.mroute_reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -372,7 +380,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = vif6_table[reg_vif_num].dev; + reg_dev = net->ipv6.vif6_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -408,25 +416,32 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net *net = dev_net(dev); + read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num, + MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } +static const struct net_device_ops reg_vif_netdev_ops = { + .ndo_start_xmit = reg_vif_xmit, +}; + static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; - dev->hard_start_xmit = reg_vif_xmit; + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; } -static struct net_device *ip6mr_reg_vif(void) +static struct net_device *ip6mr_reg_vif(struct net *net) { struct net_device *dev; @@ -434,6 +449,8 @@ static struct net_device *ip6mr_reg_vif(void) if (dev == NULL) return NULL; + dev_net_set(dev, net); + if (register_netdevice(dev)) { free_netdev(dev); return NULL; @@ -460,14 +477,14 @@ failure: * Delete a VIF entry */ -static int mif6_delete(int vifi) +static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; - if (vifi < 0 || vifi >= maxvif) + if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; - v = &vif6_table[vifi]; + v = &net->ipv6.vif6_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -479,17 +496,17 @@ static int mif6_delete(int vifi) } #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == reg_vif_num) - reg_vif_num = -1; + if (vifi == net->ipv6.mroute_reg_vif_num) + net->ipv6.mroute_reg_vif_num = -1; #endif - if (vifi + 1 == maxvif) { + if (vifi + 1 == net->ipv6.maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(tmp)) + if (MIF_EXISTS(net, tmp)) break; } - maxvif = tmp + 1; + net->ipv6.maxvif = tmp + 1; } write_unlock_bh(&mrt_lock); @@ -503,6 +520,12 @@ static int mif6_delete(int vifi) return 0; } +static inline void ip6mr_cache_free(struct mfc6_cache *c) +{ + release_net(mfc6_net(c)); + kmem_cache_free(mrt_cachep, c); +} + /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ @@ -510,8 +533,9 @@ static int mif6_delete(int vifi) static void ip6mr_destroy_unres(struct mfc6_cache *c) { struct sk_buff *skb; + struct net *net = mfc6_net(c); - atomic_dec(&cache_resolve_queue_len); + atomic_dec(&net->ipv6.cache_resolve_queue_len); while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { @@ -520,12 +544,12 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } @@ -553,7 +577,7 @@ static void ipmr_do_expire_process(unsigned long dummy) ip6mr_destroy_unres(c); } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) mod_timer(&ipmr_expire_timer, jiffies + expires); } @@ -564,7 +588,7 @@ static void ipmr_expire_process(unsigned long dummy) return; } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) ipmr_do_expire_process(dummy); spin_unlock(&mfc_unres_lock); @@ -575,13 +599,15 @@ static void ipmr_expire_process(unsigned long dummy) static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) { int vifi; + struct net *net = mfc6_net(cache); cache->mfc_un.res.minvif = MAXMIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); - for (vifi = 0; vifi < maxvif; vifi++) { - if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { + for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) { + if (MIF_EXISTS(net, vifi) && + ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; @@ -591,15 +617,15 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl } } -static int mif6_add(struct mif6ctl *vifc, int mrtsock) +static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &vif6_table[vifi]; + struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(vifi)) + if (MIF_EXISTS(net, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -609,9 +635,9 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (reg_vif_num >= 0) + if (net->ipv6.mroute_reg_vif_num >= 0) return -EADDRINUSE; - dev = ip6mr_reg_vif(); + dev = ip6mr_reg_vif(net); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); @@ -623,7 +649,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) break; #endif case 0: - dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); + dev = dev_get_by_index(net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); @@ -657,20 +683,22 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - reg_vif_num = vifi; + net->ipv6.mroute_reg_vif_num = vifi; #endif - if (vifi + 1 > maxvif) - maxvif = vifi + 1; + if (vifi + 1 > net->ipv6.maxvif) + net->ipv6.maxvif = vifi + 1; write_unlock_bh(&mrt_lock); return 0; } -static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp) +static struct mfc6_cache *ip6mr_cache_find(struct net *net, + struct in6_addr *origin, + struct in6_addr *mcastgrp) { int line = MFC6_HASH(mcastgrp, origin); struct mfc6_cache *c; - for (c = mfc6_cache_array[line]; c; c = c->next) { + for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) { if (ipv6_addr_equal(&c->mf6c_origin, origin) && ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) break; @@ -681,24 +709,24 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a /* * Allocate a multicast cache entry */ -static struct mfc6_cache *ip6mr_cache_alloc(void) +static struct mfc6_cache *ip6mr_cache_alloc(struct net *net) { - struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_KERNEL); + struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; - memset(c, 0, sizeof(*c)); c->mfc_un.res.minvif = MAXMIFS; + mfc6_net_set(c, net); return c; } -static struct mfc6_cache *ip6mr_cache_alloc_unres(void) +static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net) { - struct mfc6_cache *c = kmem_cache_alloc(mrt_cachep, GFP_ATOMIC); + struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; - memset(c, 0, sizeof(*c)); skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; + mfc6_net_set(c, net); return c; } @@ -727,7 +755,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid); } else ip6_mr_forward(skb, c); } @@ -740,7 +768,8 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) * Called under mrt_lock. */ -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, + int assert) { struct sk_buff *skb; struct mrt6msg *msg; @@ -776,7 +805,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = reg_vif_num; + msg->im6_mif = net->ipv6.mroute_reg_vif_num; msg->im6_pad = 0; ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); @@ -813,7 +842,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) skb_pull(skb, sizeof(struct ipv6hdr)); } - if (mroute6_socket == NULL) { + if (net->ipv6.mroute6_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -821,7 +850,8 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) /* * Deliver to user space multicast routing algorithms */ - if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) { + ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb); + if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -835,14 +865,15 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) */ static int -ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) +ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb) { int err; struct mfc6_cache *c; spin_lock_bh(&mfc_unres_lock); for (c = mfc_unres_queue; c; c = c->next) { - if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && + if (net_eq(mfc6_net(c), net) && + ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) break; } @@ -852,8 +883,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres()) == NULL) { + if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 || + (c = ip6mr_cache_alloc_unres(net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -870,18 +901,19 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) /* * Reflect first query at pim6sd */ - if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) { + err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ spin_unlock_bh(&mfc_unres_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); kfree_skb(skb); return err; } - atomic_inc(&cache_resolve_queue_len); + atomic_inc(&net->ipv6.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -907,21 +939,22 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc) { int line; struct mfc6_cache *c, **cp; line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &net->ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { write_lock_bh(&mrt_lock); *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); return 0; } } @@ -932,19 +965,17 @@ static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev_net(dev); struct mif_device *v; int ct; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &vif6_table[0]; - for (ct = 0; ct < maxvif; ct++, v++) { + v = &net->ipv6.vif6_table[0]; + for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(ct); + mif6_delete(net, ct); } return NOTIFY_DONE; } @@ -957,6 +988,66 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ +static int __net_init ip6mr_net_init(struct net *net) +{ + int err = 0; + net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device), + GFP_KERNEL); + if (!net->ipv6.vif6_table) { + err = -ENOMEM; + goto fail; + } + + /* Forwarding cache */ + net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES, + sizeof(struct mfc6_cache *), + GFP_KERNEL); + if (!net->ipv6.mfc6_cache_array) { + err = -ENOMEM; + goto fail_mfc6_cache; + } + +#ifdef CONFIG_IPV6_PIMSM_V2 + net->ipv6.mroute_reg_vif_num = -1; +#endif + +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) + goto proc_cache_fail; +#endif + return 0; + +#ifdef CONFIG_PROC_FS +proc_cache_fail: + proc_net_remove(net, "ip6_mr_vif"); +proc_vif_fail: + kfree(net->ipv6.mfc6_cache_array); +#endif +fail_mfc6_cache: + kfree(net->ipv6.vif6_table); +fail: + return err; +} + +static void __net_exit ip6mr_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ip6_mr_cache"); + proc_net_remove(net, "ip6_mr_vif"); +#endif + mroute_clean_tables(net); + kfree(net->ipv6.mfc6_cache_array); + kfree(net->ipv6.vif6_table); +} + +static struct pernet_operations ip6mr_net_ops = { + .init = ip6mr_net_init, + .exit = ip6mr_net_exit, +}; + int __init ip6_mr_init(void) { int err; @@ -968,43 +1059,32 @@ int __init ip6_mr_init(void) if (!mrt_cachep) return -ENOMEM; + err = register_pernet_subsys(&ip6mr_net_ops); + if (err) + goto reg_pernet_fail; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); err = register_netdevice_notifier(&ip6_mr_notifier); if (err) goto reg_notif_fail; -#ifdef CONFIG_PROC_FS - err = -ENOMEM; - if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) - goto proc_vif_fail; - if (!proc_net_fops_create(&init_net, "ip6_mr_cache", - 0, &ip6mr_mfc_fops)) - goto proc_cache_fail; -#endif return 0; -#ifdef CONFIG_PROC_FS -proc_cache_fail: - proc_net_remove(&init_net, "ip6_mr_vif"); -proc_vif_fail: - unregister_netdevice_notifier(&ip6_mr_notifier); -#endif reg_notif_fail: del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); +reg_pernet_fail: kmem_cache_destroy(mrt_cachep); return err; } void ip6_mr_cleanup(void) { -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ip6_mr_cache"); - proc_net_remove(&init_net, "ip6_mr_vif"); -#endif unregister_netdevice_notifier(&ip6_mr_notifier); del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); } -static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) +static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) { int line; struct mfc6_cache *uc, *c, **cp; @@ -1020,7 +1100,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &net->ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) break; @@ -1039,7 +1120,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; - c = ip6mr_cache_alloc(); + c = ip6mr_cache_alloc(net); if (c == NULL) return -ENOMEM; @@ -1051,8 +1132,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = mfc6_cache_array[line]; - mfc6_cache_array[line] = c; + c->next = net->ipv6.mfc6_cache_array[line]; + net->ipv6.mfc6_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -1062,19 +1143,21 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc = *cp) != NULL; cp = &uc->next) { - if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && + if (net_eq(mfc6_net(uc), net) && + ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { *cp = uc->next; - if (atomic_dec_and_test(&cache_resolve_queue_len)) - del_timer(&ipmr_expire_timer); + atomic_dec(&net->ipv6.cache_resolve_queue_len); break; } } + if (mfc_unres_queue == NULL) + del_timer(&ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (uc) { ip6mr_cache_resolve(uc, c); - kmem_cache_free(mrt_cachep, uc); + ip6mr_cache_free(uc); } return 0; } @@ -1083,25 +1166,25 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct sock *sk) +static void mroute_clean_tables(struct net *net) { int i; /* * Shut down all active vif entries */ - for (i = 0; i < maxvif; i++) { - if (!(vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(i); + for (i = 0; i < net->ipv6.maxvif; i++) { + if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) + mif6_delete(net, i); } /* * Wipe the cache */ - for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) { + for (i = 0; i < MFC6_LINES; i++) { struct mfc6_cache *c, **cp; - cp = &mfc6_cache_array[i]; + cp = &net->ipv6.mfc6_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags & MFC_STATIC) { cp = &c->next; @@ -1111,22 +1194,22 @@ static void mroute_clean_tables(struct sock *sk) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } } - if (atomic_read(&cache_resolve_queue_len) != 0) { - struct mfc6_cache *c; + if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) { + struct mfc6_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); - while (mfc_unres_queue != NULL) { - c = mfc_unres_queue; - mfc_unres_queue = c->next; - spin_unlock_bh(&mfc_unres_lock); - + cp = &mfc_unres_queue; + while ((c = *cp) != NULL) { + if (!net_eq(mfc6_net(c), net)) { + cp = &c->next; + continue; + } + *cp = c->next; ip6mr_destroy_unres(c); - - spin_lock_bh(&mfc_unres_lock); } spin_unlock_bh(&mfc_unres_lock); } @@ -1135,11 +1218,12 @@ static void mroute_clean_tables(struct sock *sk) static int ip6mr_sk_init(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(mroute6_socket == NULL)) - mroute6_socket = sk; + if (likely(net->ipv6.mroute6_sk == NULL)) + net->ipv6.mroute6_sk = sk; else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1152,14 +1236,15 @@ static int ip6mr_sk_init(struct sock *sk) int ip6mr_sk_done(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); - if (sk == mroute6_socket) { + if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); - mroute6_socket = NULL; + net->ipv6.mroute6_sk = NULL; write_unlock_bh(&mrt_lock); - mroute_clean_tables(sk); + mroute_clean_tables(net); } else err = -EACCES; rtnl_unlock(); @@ -1180,9 +1265,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; + struct net *net = sock_net(sk); if (optname != MRT6_INIT) { - if (sk != mroute6_socket && !capable(CAP_NET_ADMIN)) + if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1207,7 +1293,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(&vif, sk == mroute6_socket); + ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1217,7 +1303,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(mifi); + ret = mif6_delete(net, mifi); rtnl_unlock(); return ret; @@ -1233,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -EFAULT; rtnl_lock(); if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(&mfc); + ret = ip6mr_mfc_delete(net, &mfc); else - ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket); + ret = ip6mr_mfc_add(net, &mfc, + sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1247,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - mroute_do_assert = !!v; + net->ipv6.mroute_do_assert = !!v; return 0; } @@ -1260,10 +1347,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int v = !!v; rtnl_lock(); ret = 0; - if (v != mroute_do_pim) { - mroute_do_pim = v; - mroute_do_assert = v; - if (mroute_do_pim) + if (v != net->ipv6.mroute_do_pim) { + net->ipv6.mroute_do_pim = v; + net->ipv6.mroute_do_assert = v; + if (net->ipv6.mroute_do_pim) ret = inet6_add_protocol(&pim6_protocol, IPPROTO_PIM); else @@ -1295,6 +1382,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, { int olr; int val; + struct net *net = sock_net(sk); switch (optname) { case MRT6_VERSION: @@ -1302,11 +1390,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: - val = mroute_do_pim; + val = net->ipv6.mroute_do_pim; break; #endif case MRT6_ASSERT: - val = mroute_do_assert; + val = net->ipv6.mroute_do_assert; break; default: return -ENOPROTOOPT; @@ -1336,16 +1424,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct sioc_mif_req6 vr; struct mif_device *vif; struct mfc6_cache *c; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.mifi >= maxvif) + if (vr.mifi >= net->ipv6.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &vif6_table[vr.mifi]; - if (MIF_EXISTS(vr.mifi)) { + vif = &net->ipv6.vif6_table[vr.mifi]; + if (MIF_EXISTS(net, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1363,7 +1452,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; read_lock(&mrt_lock); - c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr); + c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { sr.pktcnt = c->mfc_un.res.pkt; sr.bytecnt = c->mfc_un.res.bytes; @@ -1396,7 +1485,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &vif6_table[vifi]; + struct net *net = mfc6_net(c); + struct mif_device *vif = &net->ipv6.vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi fl; @@ -1410,9 +1500,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; - ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); - kfree_skb(skb); - return 0; + ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT); + goto out_free; } #endif @@ -1425,7 +1514,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) } }; - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (!dst) goto out_free; @@ -1468,9 +1557,10 @@ out_free: static int ip6mr_find_vif(struct net_device *dev) { + struct net *net = dev_net(dev); int ct; - for (ct = maxvif - 1; ct >= 0; ct--) { - if (vif6_table[ct].dev == dev) + for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) { + if (net->ipv6.vif6_table[ct].dev == dev) break; } return ct; @@ -1480,6 +1570,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) { int psend = -1; int vif, ct; + struct net *net = mfc6_net(cache); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; @@ -1488,29 +1579,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif6_table[vif].dev != skb->dev) { + if (net->ipv6.vif6_table[vif].dev != skb->dev) { int true_vifi; cache->mfc_un.res.wrong_if++; true_vifi = ip6mr_find_vif(skb->dev); - if (true_vifi >= 0 && mroute_do_assert && + if (true_vifi >= 0 && net->ipv6.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + (net->ipv6.mroute_do_pim || + cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; - ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); + ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF); } goto dont_forward; } - vif6_table[vif].pkt_in++; - vif6_table[vif].bytes_in += skb->len; + net->ipv6.vif6_table[vif].pkt_in++; + net->ipv6.vif6_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1543,9 +1635,11 @@ dont_forward: int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; + struct net *net = dev_net(skb->dev); read_lock(&mrt_lock); - cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + cache = ip6mr_cache_find(net, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); /* * No usable cache entry @@ -1555,7 +1649,7 @@ int ip6_mr_input(struct sk_buff *skb) vif = ip6mr_find_vif(skb->dev); if (vif >= 0) { - int err = ip6mr_cache_unresolved(vif, skb); + int err = ip6mr_cache_unresolved(net, vif, skb); read_unlock(&mrt_lock); return err; @@ -1578,7 +1672,8 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = vif6_table[c->mf6c_parent].dev; + struct net *net = mfc6_net(c); + struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1594,7 +1689,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; + nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } @@ -1608,14 +1703,15 @@ rtattr_failure: return -EMSGSIZE; } -int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, + struct sk_buff *skb, struct rtmsg *rtm, int nowait) { int err; struct mfc6_cache *cache; struct rt6_info *rt = (struct rt6_info *)skb->dst; read_lock(&mrt_lock); - cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr); + cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache) { struct sk_buff *skb2; @@ -1658,7 +1754,7 @@ int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); - err = ip6mr_cache_unresolved(vif, skb2); + err = ip6mr_cache_unresolved(net, vif, skb2); read_unlock(&mrt_lock); return err; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 4545e430686..3a0b3be7ece 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -63,12 +63,12 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n", - spi, NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", + spi, &iph->daddr); xfrm_state_put(x); } @@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (!t) goto out; @@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2aa294be0c7..eeeaad2e8b5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -395,6 +395,28 @@ sticky_done: break; } + case IPV6_PKTINFO: + { + struct in6_pktinfo pkt; + + if (optlen == 0) + goto e_inval; + else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + goto e_inval; + + if (copy_from_user(&pkt, optval, optlen)) { + retv = -EFAULT; + break; + } + if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) + goto e_inval; + + np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; + ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr); + retv = 0; + break; + } + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; @@ -916,8 +938,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; - ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + np->sticky_pktinfo.ipi6_ifindex; + np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : + ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -926,8 +950,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; - ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + np->sticky_pktinfo.ipi6_ifindex; + np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : + ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d7b3c6d398a..a51fb33e686 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -303,20 +303,23 @@ static struct inet6_dev *ip6_mc_find_dev(struct net *net, dev = dev_get_by_index(net, ifindex); if (!dev) - return NULL; + goto nodev; idev = in6_dev_get(dev); - if (!idev) { - dev_put(dev); - return NULL; - } + if (!idev) + goto release; read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); - return NULL; - } + if (idev->dead) + goto unlock_release; + return idev; + +unlock_release: + read_unlock_bh(&idev->lock); + in6_dev_put(idev); +release: + dev_put(dev); +nodev: + return NULL; } void ipv6_sock_mc_close(struct sock *sk) @@ -1466,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1817,7 +1820,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); @@ -1831,7 +1834,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -2430,9 +2433,9 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); seq_printf(seq, - "%-4d %-15s " NIP6_SEQFMT " %5d %08X %ld\n", + "%-4d %-15s %pi6 %5d %08X %ld\n", state->dev->ifindex, state->dev->name, - NIP6(im->mca_addr), + &im->mca_addr, im->mca_users, im->mca_flags, (im->mca_flags&MAF_TIMER_RUNNING) ? jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0); @@ -2591,10 +2594,10 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) "Source Address", "INC", "EXC"); } else { seq_printf(seq, - "%3d %6.6s " NIP6_SEQFMT " " NIP6_SEQFMT " %6lu %6lu\n", + "%3d %6.6s %pi6 %pi6 %6lu %6lu\n", state->dev->ifindex, state->dev->name, - NIP6(state->im->mca_addr), - NIP6(psf->sf_addr), + &state->im->mca_addr, + &psf->sf_addr, psf->sf_count[MCAST_INCLUDE], psf->sf_count[MCAST_EXCLUDE]); } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 31295c8f619..f995e19c87a 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -205,6 +205,7 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) { + struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; @@ -247,7 +248,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sel.sport_mask = htons(~0); sel.ifindex = fl->oif; - err = km_report(IPPROTO_DSTOPTS, &sel, + err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320ee..3e2970841bd 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -437,38 +437,20 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -/* - * Send a Neighbour Advertisement - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) +struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo) { - struct flowi fl; - struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; - struct inet6_dev *idev; int len; int err; - u8 *opt, type; - - type = icmp6h->icmp6_type; - - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); - - dst = icmp6_dst_alloc(dev, neigh, daddr); - if (!dst) - return; - - err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) - return; + u8 *opt; if (!dev->addr_len) llinfo = 0; @@ -485,8 +467,7 @@ static void __ndisc_send(struct net_device *dev, ND_PRINTK0(KERN_ERR "ICMPv6 ND: %s() failed to allocate an skb.\n", __func__); - dst_release(dst); - return; + return NULL; } skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -510,9 +491,45 @@ static void __ndisc_send(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); + return skb; +} + +EXPORT_SYMBOL(ndisc_build_skb); + +void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h) +{ + struct flowi fl; + struct dst_entry *dst; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; + struct inet6_dev *idev; + int err; + u8 type; + + type = icmp6h->icmp6_type; + + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + + dst = icmp6_dst_alloc(dev, neigh, daddr); + if (!dst) { + kfree_skb(skb); + return; + } + + err = xfrm_lookup(net, &dst, &fl, NULL, 0); + if (err < 0) { + kfree_skb(skb); + return; + } + skb->dst = dst; idev = in6_dev_get(dst->dev); @@ -529,6 +546,27 @@ static void __ndisc_send(struct net_device *dev, in6_dev_put(idev); } +EXPORT_SYMBOL(ndisc_send_skb); + +/* + * Send a Neighbour Discover packet + */ +static void __ndisc_send(struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, const struct in6_addr *target, + int llinfo) +{ + struct sk_buff *skb; + + skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); + if (!skb) + return; + + ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); +} + static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -647,11 +685,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG - "%s(): trying to ucast probe in NUD_INVALID: " - NIP6_FMT "\n", - __func__, - NIP6(*target)); + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n", + __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { @@ -912,8 +947,13 @@ static void ndisc_recv_na(struct sk_buff *skb) is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) + + We should not print the error if NA has been + received from loopback - it is just our own + unsolicited advertisement. */ - ND_PRINTK1(KERN_WARNING + if (skb->pkt_type != PACKET_LOOPBACK) + ND_PRINTK1(KERN_WARNING "ICMPv6 NA: someone advertises our address on %s!\n", ifp->idev->dev->name); in6_ifa_put(ifp); @@ -1489,7 +1529,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; @@ -1577,7 +1617,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e332..834cea69fb5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -56,6 +56,7 @@ EXPORT_SYMBOL(ip6_route_me_harder); struct ip6_rt_info { struct in6_addr daddr; struct in6_addr saddr; + u_int32_t mark; }; static void nf_ip6_saveroute(const struct sk_buff *skb, @@ -68,6 +69,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, rt_info->daddr = iph->daddr; rt_info->saddr = iph->saddr; + rt_info->mark = skb->mark; } } @@ -79,7 +81,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || - !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || + skb->mark != rt_info->mark) return ip6_route_me_harder(skb); } return 0; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d0956..37adf5abc51 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr)); + printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", @@ -364,8 +364,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } @@ -424,9 +424,8 @@ ip6t_log_packet(u_int8_t pf, if (skb->dev->type == ARPHRD_SIT) { const struct iphdr *iph = (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + printk("TUNNEL=%pI4->%pI4 ", + &iph->saddr, &iph->daddr); } } else printk(" "); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b..5a2d0a41694 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index b110a8a85a1..40d2e36d8fa 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,7 +61,7 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_local_in_hook(unsigned int hook, +ip6t_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,17 +72,6 @@ ip6t_local_in_hook(unsigned int hook, } static unsigned int -ip6t_forward_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ip6t_do_table(skb, hook, in, out, - dev_net(in)->ipv6.ip6table_filter); -} - -static unsigned int ip6t_local_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, @@ -105,14 +94,14 @@ ip6t_local_out_hook(unsigned int hook, static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_local_in_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { - .hook = ip6t_forward_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e91db16611d..727b9530448 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,9 +56,8 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ", - NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), - NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); + return seq_printf(s, "src=%pI6 dst=%pI6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); } /* diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 05726177903..bd52151d31e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -253,7 +253,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 9967ac7a01a..ed4d79a9e4a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -80,7 +80,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, @@ -88,7 +88,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -96,7 +96,7 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc2..61f6827e590 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index af12de071f4..3c575118fca 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -642,7 +642,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, @@ -650,7 +650,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IPV6_IP6FRAG_TIME, @@ -658,8 +658,8 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { } }; @@ -671,8 +671,8 @@ static struct ctl_table ip6_frags_ctl_table[] = { .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies }, { } }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 89dc6992434..18c486cf498 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -2196,7 +2194,7 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait); if (err <= 0) { if (!nowait) { if (err == 0) @@ -2408,19 +2406,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - seq_printf(m, NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -2502,7 +2497,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, - .proc_handler = &ipv6_sysctl_rtcache_flush + .proc_handler = ipv6_sysctl_rtcache_flush }, { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, @@ -2510,7 +2505,7 @@ ctl_table ipv6_route_table_template[] = { .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, @@ -2518,7 +2513,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, @@ -2526,8 +2521,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, @@ -2535,8 +2530,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, @@ -2544,8 +2539,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, @@ -2553,8 +2548,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, @@ -2562,8 +2557,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, @@ -2571,8 +2566,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, @@ -2580,8 +2575,8 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, + .proc_handler = proc_dointvec_ms_jiffies, + .strategy = sysctl_ms_jiffies, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b7a50e96850..d3467e563f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,8 +62,8 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -static int ipip6_fb_tunnel_init(struct net_device *dev); -static int ipip6_tunnel_init(struct net_device *dev); +static void ipip6_fb_tunnel_init(struct net_device *dev); +static void ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static int sit_net_id; @@ -188,7 +188,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - dev->init = ipip6_tunnel_init; + ipip6_tunnel_init(dev); + nt->parms = *parms; if (parms->i_flags & SIT_ISATAP) @@ -926,13 +927,17 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops ipip6_netdev_ops = { + .ndo_uninit = ipip6_tunnel_uninit, + .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_change_mtu = ipip6_tunnel_change_mtu, +}; + static void ipip6_tunnel_setup(struct net_device *dev) { - dev->uninit = ipip6_tunnel_uninit; + dev->netdev_ops = &ipip6_netdev_ops; dev->destructor = free_netdev; - dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->do_ioctl = ipip6_tunnel_ioctl; - dev->change_mtu = ipip6_tunnel_change_mtu; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); @@ -943,11 +948,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipip6_tunnel_init(struct net_device *dev) +static void ipip6_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -956,11 +959,9 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - - return 0; } -static int ipip6_fb_tunnel_init(struct net_device *dev) +static void ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -977,7 +978,6 @@ static int ipip6_fb_tunnel_init(struct net_device *dev) dev_hold(dev); sitn->tunnels_wc[0] = tunnel; - return 0; } static struct xfrm_tunnel sit_handler = { @@ -1025,16 +1025,17 @@ static int sit_init_net(struct net *net) err = -ENOMEM; goto err_alloc_dev; } - - sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init; dev_net_set(sitn->fb_tunnel_dev, net); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; return 0; err_reg_dev: + dev_put(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: /* nothing */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14..711175e0571 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 587f8f60c48..9048fe7e7ea 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -35,7 +35,7 @@ static ctl_table ipv6_table_template[] = { .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -47,7 +47,7 @@ static ctl_table ipv6_table[] = { .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6b356b7912..8702b06cb60 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -501,7 +502,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -872,12 +873,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n", genhash ? "failed" : "mismatch", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); + &ip6h->saddr, ntohs(th->source), + &ip6h->daddr, ntohs(th->dest)); } return 1; } @@ -917,7 +916,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -999,7 +998,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); @@ -1020,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1318,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1831,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -2045,6 +2044,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8b48512ebf6..84b1a296eec 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,62 +54,91 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, + unsigned short hnum, + struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + score = 0; + if (inet->dport) { + if (inet->dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; - struct hlist_node *node; + struct sock *sk, *result; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - int score = 0; - if (inet->dport) { - if (inet->dport != sport) - continue; - score++; - } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 4) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness; + + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { + score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, saddr, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; struct ipv6hdr *iph = ipv6_hdr(skb); @@ -253,7 +282,7 @@ csum_copy_err: void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info, - struct hlist_head udptable[] ) + struct udp_table *udptable) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; @@ -289,7 +318,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) @@ -347,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net)) @@ -388,14 +417,15 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -404,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { @@ -423,7 +453,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -461,7 +491,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -558,7 +588,7 @@ discard: static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } /* @@ -763,6 +793,9 @@ do_udp_sendmsg: if (!fl.oif) fl.oif = sk->sk_bound_dev_if; + if (!fl.oif) + fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); @@ -819,7 +852,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -1022,7 +1056,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1064,7 +1098,8 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udp_hash, + .slab_flags = SLAB_DESTROY_BY_RCU, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 92dd7da766d..23779208c33 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,9 +7,9 @@ #include <net/inet_common.h> #include <net/transp_v6.h> -extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct hlist_head []); + int , int , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3cd1a1ac3d6..ba162a82458 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -15,14 +15,14 @@ static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } static struct inet6_protocol udplitev6_protocol = { @@ -49,7 +49,8 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udplite_hash, + .slab_flags = SLAB_DESTROY_BY_RCU, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, @@ -95,7 +96,7 @@ void udplitev6_exit(void) static struct udp_seq_afinfo udplite6_seq_afinfo = { .name = "udplite6", .family = AF_INET6, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a71c7ddcb41..9084582d236 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { + struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; int i = 0; @@ -67,7 +68,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -76,7 +77,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -100,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); if (!x) continue; @@ -122,7 +123,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (!x) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08e4cbbe3f0..97ab068e8cc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,8 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = {}; @@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); err = dst->error; if (dst->error) { @@ -49,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, return dst; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm6_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -144,6 +146,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) static inline void _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { + int onlyproto = 0; u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct ipv6_opt_hdr *exthdr; @@ -159,6 +162,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) exthdr = (struct ipv6_opt_hdr *)(nh + offset); switch (nexthdr) { + case NEXTHDR_FRAGMENT: + onlyproto = 1; case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: @@ -172,7 +177,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 4 - skb->data)) { __be16 *ports = (__be16 *)exthdr; fl->fl_ip_sport = ports[!!reverse]; @@ -182,7 +187,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) return; case IPPROTO_ICMPV6: - if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; fl->fl_icmp_type = icmp[0]; @@ -193,7 +198,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPPROTO_MH: - if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; @@ -217,7 +222,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(); + xfrm6_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } @@ -274,7 +279,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 60c78cfc273..0e685b05496 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -19,8 +19,6 @@ #include <net/ipv6.h> #include <net/addrconf.h> -static struct xfrm_state_afinfo xfrm6_state_afinfo; - static void __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c2b27813860..80193db224d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -345,24 +345,23 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = { static int __init xfrm6_tunnel_init(void) { if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) - return -EAGAIN; - - if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) { - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } - if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) { - xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } - if (xfrm6_tunnel_spi_init() < 0) { - xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); - xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); - xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); - return -EAGAIN; - } + goto err; + if (xfrm6_tunnel_register(&xfrm6_tunnel_handler, AF_INET6)) + goto unreg; + if (xfrm6_tunnel_register(&xfrm46_tunnel_handler, AF_INET)) + goto dereg6; + if (xfrm6_tunnel_spi_init() < 0) + goto dereg46; return 0; + +dereg46: + xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); +dereg6: + xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); +unreg: + xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); +err: + return -EAGAIN; } static void __exit xfrm6_tunnel_fini(void) diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 92fef864e85..633fcab3558 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -23,7 +23,7 @@ static struct ctl_table ipx_table[] = { .data = &sysctl_ipx_pprop_broadcasting, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0 }, }; diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index 6be1ec26b30..42f7d960d05 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -436,7 +436,6 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, __u16 tmp_cpu; /* Temporary value in host order */ __u8 *bytes; int i; - DECLARE_MAC_BUF(mac); IRDA_DEBUG(4, "%s(), parm=%s\n", __func__ , param); @@ -521,8 +520,7 @@ static void irlan_check_response_param(struct irlan_cb *self, char *param, /* FILTER_ENTRY, have we got an ethernet address? */ if (strcmp(param, "FILTER_ENTRY") == 0) { bytes = value; - IRDA_DEBUG(4, "Ethernet address = %s\n", - print_mac(mac, bytes)); + IRDA_DEBUG(4, "Ethernet address = %pM\n", bytes); for (i = 0; i < 6; i++) self->dev->dev_addr[i] = bytes[i]; } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 9a1cd87e714..774d73a7685 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -207,7 +207,7 @@ static struct irlan_cb *irlan_open(__u32 saddr, __u32 daddr) if (!dev) return NULL; - self = dev->priv; + self = netdev_priv(dev); self->dev = dev; /* diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index f17b65af9c9..2562ebc1b22 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1325,6 +1325,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, struct irlap_cb *self; int command; __u8 control; + int ret = -1; if (!net_eq(dev_net(dev), &init_net)) goto out; @@ -1333,25 +1334,21 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, self = (struct irlap_cb *) dev->atalk_ptr; /* If the net device is down, then IrLAP is gone! */ - if (!self || self->magic != LAP_MAGIC) { - dev_kfree_skb(skb); - return -1; - } + if (!self || self->magic != LAP_MAGIC) + goto err; /* We are no longer an "old" protocol, so we need to handle * share and non linear skbs. This should never happen, so * we don't need to be clever about it. Jean II */ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { IRDA_ERROR("%s: can't clone shared skb!\n", __func__); - dev_kfree_skb(skb); - return -1; + goto err; } /* Check if frame is large enough for parsing */ if (!pskb_may_pull(skb, 2)) { IRDA_ERROR("%s: frame too short!\n", __func__); - dev_kfree_skb(skb); - return -1; + goto err; } command = skb->data[0] & CMD_FRAME; @@ -1442,7 +1439,9 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, break; } out: + ret = 0; +err: /* Always drop our reference on the skb */ dev_kfree_skb(skb); - return 0; + return ret; } diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 9ab3df15425..57f8817c397 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -118,8 +118,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &do_discovery, - .strategy = &sysctl_intvec + .proc_handler = do_discovery, + .strategy = sysctl_intvec }, { .ctl_name = NET_IRDA_DEVNAME, @@ -127,8 +127,8 @@ static ctl_table irda_table[] = { .data = sysctl_devname, .maxlen = 65, .mode = 0644, - .proc_handler = &do_devname, - .strategy = &sysctl_string + .proc_handler = do_devname, + .strategy = sysctl_string }, #ifdef CONFIG_IRDA_DEBUG { @@ -137,7 +137,7 @@ static ctl_table irda_table[] = { .data = &irda_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif #ifdef CONFIG_IRDA_FAST_RR @@ -147,7 +147,7 @@ static ctl_table irda_table[] = { .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, #endif { @@ -156,8 +156,8 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_slots, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_discovery_slots, .extra2 = &max_discovery_slots }, @@ -167,7 +167,7 @@ static ctl_table irda_table[] = { .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = NET_IRDA_SLOT_TIMEOUT, @@ -175,8 +175,8 @@ static ctl_table irda_table[] = { .data = &sysctl_slot_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_slot_timeout, .extra2 = &max_slot_timeout }, @@ -186,8 +186,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_baud_rate, .extra2 = &max_max_baud_rate }, @@ -197,8 +197,8 @@ static ctl_table irda_table[] = { .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_min_tx_turn_time, .extra2 = &max_min_tx_turn_time }, @@ -208,8 +208,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_data_size, .extra2 = &max_max_tx_data_size }, @@ -219,8 +219,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_tx_window, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_tx_window, .extra2 = &max_max_tx_window }, @@ -230,8 +230,8 @@ static ctl_table irda_table[] = { .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_max_noreply_time, .extra2 = &max_max_noreply_time }, @@ -241,8 +241,8 @@ static ctl_table irda_table[] = { .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_warn_noreply_time, .extra2 = &max_warn_noreply_time }, @@ -252,8 +252,8 @@ static ctl_table irda_table[] = { .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_lap_keepalive_time, .extra2 = &max_lap_keepalive_time }, diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 74e439e8082..ecf4eb2717c 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -201,7 +201,7 @@ static void irttp_todo_expired(unsigned long data) * * Flushes (removes all frames) in transitt-buffer (tx_list) */ -void irttp_flush_queues(struct tsap_cb *self) +static void irttp_flush_queues(struct tsap_cb *self) { struct sk_buff* skb; @@ -1266,9 +1266,9 @@ static void irttp_connect_confirm(void *instance, void *sap, * Some other device is connecting to this TSAP * */ -void irttp_connect_indication(void *instance, void *sap, struct qos_info *qos, - __u32 max_seg_size, __u8 max_header_size, - struct sk_buff *skb) +static void irttp_connect_indication(void *instance, void *sap, + struct qos_info *qos, __u32 max_seg_size, __u8 max_header_size, + struct sk_buff *skb) { struct tsap_cb *self; struct lsap_cb *lsap; @@ -1579,8 +1579,8 @@ EXPORT_SYMBOL(irttp_disconnect_request); * Disconnect indication, TSAP disconnected by peer? * */ -void irttp_disconnect_indication(void *instance, void *sap, LM_REASON reason, - struct sk_buff *skb) +static void irttp_disconnect_indication(void *instance, void *sap, + LM_REASON reason, struct sk_buff *skb) { struct tsap_cb *self; @@ -1664,7 +1664,7 @@ static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb) * Check if we have any frames to be transmitted, or if we have any * available credit to give away. */ -void irttp_run_rx_queue(struct tsap_cb *self) +static void irttp_run_rx_queue(struct tsap_cb *self) { struct sk_buff *skb; int more = 0; diff --git a/net/irda/timer.c b/net/irda/timer.c index d730099080a..0335ba0cc59 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -219,7 +219,7 @@ static void irlap_backoff_timer_expired(void *data) * * */ -void irlap_media_busy_expired(void* data) +static void irlap_media_busy_expired(void *data) { struct irlap_cb *self = (struct irlap_cb *) data; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 29f7baa2511..af3192d2a5a 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -8,6 +8,9 @@ * Author(s): Jennifer Hunt <jenhunt@us.ibm.com> */ +#define KMSG_COMPONENT "af_iucv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/list.h> @@ -616,6 +619,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; struct iucv_message txmsg; + char user_id[9]; + char appl_id[9]; int err; err = sock_error(sk); @@ -651,8 +656,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, err = iucv_message_send(iucv->path, &txmsg, 0, 0, (void *) skb->data, skb->len); if (err) { - if (err == 3) - printk(KERN_ERR "AF_IUCV msg limit exceeded\n"); + if (err == 3) { + user_id[8] = 0; + memcpy(user_id, iucv->dst_user_id, 8); + appl_id[8] = 0; + memcpy(appl_id, iucv->dst_name, 8); + pr_err("Application %s on z/VM guest %s" + " exceeds message limit\n", + user_id, appl_id); + } skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; @@ -1190,7 +1202,8 @@ static int __init afiucv_init(void) int err; if (!MACHINE_IS_VM) { - printk(KERN_ERR "AF_IUCV connection needs VM as base\n"); + pr_err("The af_iucv module cannot be loaded" + " without z/VM\n"); err = -EPROTONOSUPPORT; goto out; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index d7b54b5bfa6..8f57d4f4328 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -30,6 +30,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define KMSG_COMPONENT "iucv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> @@ -424,8 +427,8 @@ static void iucv_declare_cpu(void *data) err = "Paging or storage error"; break; } - printk(KERN_WARNING "iucv_register: iucv_declare_buffer " - "on cpu %i returned error 0x%02x (%s)\n", cpu, rc, err); + pr_warning("Defining an interrupt buffer on CPU %i" + " failed with 0x%02x (%s)\n", cpu, rc, err); return; } @@ -957,7 +960,52 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, EXPORT_SYMBOL(iucv_message_purge); /** - * iucv_message_receive + * iucv_message_receive_iprmdata + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is received (IUCV_IPBUFLST) + * @buffer: address of data buffer or address of struct iucv_array + * @size: length of data buffer + * @residual: + * + * Internal function used by iucv_message_receive and __iucv_message_receive + * to receive RMDATA data stored in struct iucv_message. + */ +static int iucv_message_receive_iprmdata(struct iucv_path *path, + struct iucv_message *msg, + u8 flags, void *buffer, + size_t size, size_t *residual) +{ + struct iucv_array *array; + u8 *rmmsg; + size_t copy; + + /* + * Message is 8 bytes long and has been stored to the + * message descriptor itself. + */ + if (residual) + *residual = abs(size - 8); + rmmsg = msg->rmmsg; + if (flags & IUCV_IPBUFLST) { + /* Copy to struct iucv_array. */ + size = (size < 8) ? size : 8; + for (array = buffer; size > 0; array++) { + copy = min_t(size_t, size, array->length); + memcpy((u8 *)(addr_t) array->address, + rmmsg, copy); + rmmsg += copy; + size -= copy; + } + } else { + /* Copy to direct buffer. */ + memcpy(buffer, rmmsg, min_t(size_t, size, 8)); + } + return 0; +} + +/** + * __iucv_message_receive * @path: address of iucv path structure * @msg: address of iucv msg structure * @flags: how the message is received (IUCV_IPBUFLST) @@ -969,44 +1017,19 @@ EXPORT_SYMBOL(iucv_message_purge); * established paths. This function will deal with RMDATA messages * embedded in struct iucv_message as well. * + * Locking: no locking + * * Returns the result from the CP IUCV call. */ -int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, - u8 flags, void *buffer, size_t size, size_t *residual) +int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *buffer, size_t size, size_t *residual) { union iucv_param *parm; - struct iucv_array *array; - u8 *rmmsg; - size_t copy; int rc; - if (msg->flags & IUCV_IPRMDATA) { - /* - * Message is 8 bytes long and has been stored to the - * message descriptor itself. - */ - rc = (size < 8) ? 5 : 0; - if (residual) - *residual = abs(size - 8); - rmmsg = msg->rmmsg; - if (flags & IUCV_IPBUFLST) { - /* Copy to struct iucv_array. */ - size = (size < 8) ? size : 8; - for (array = buffer; size > 0; array++) { - copy = min_t(size_t, size, array->length); - memcpy((u8 *)(addr_t) array->address, - rmmsg, copy); - rmmsg += copy; - size -= copy; - } - } else { - /* Copy to direct buffer. */ - memcpy(buffer, rmmsg, min_t(size_t, size, 8)); - } - return 0; - } - - local_bh_disable(); + if (msg->flags & IUCV_IPRMDATA) + return iucv_message_receive_iprmdata(path, msg, flags, + buffer, size, residual); parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); parm->db.ipbfadr1 = (u32)(addr_t) buffer; @@ -1022,6 +1045,37 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, if (residual) *residual = parm->db.ipbfln1f; } + return rc; +} +EXPORT_SYMBOL(__iucv_message_receive); + +/** + * iucv_message_receive + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is received (IUCV_IPBUFLST) + * @buffer: address of data buffer or address of struct iucv_array + * @size: length of data buffer + * @residual: + * + * This function receives messages that are being sent to you over + * established paths. This function will deal with RMDATA messages + * embedded in struct iucv_message as well. + * + * Locking: local_bh_enable/local_bh_disable + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *buffer, size_t size, size_t *residual) +{ + int rc; + + if (msg->flags & IUCV_IPRMDATA) + return iucv_message_receive_iprmdata(path, msg, flags, + buffer, size, residual); + local_bh_disable(); + rc = __iucv_message_receive(path, msg, flags, buffer, size, residual); local_bh_enable(); return rc; } @@ -1101,7 +1155,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, EXPORT_SYMBOL(iucv_message_reply); /** - * iucv_message_send + * __iucv_message_send * @path: address of iucv path structure * @msg: address of iucv msg structure * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) @@ -1113,15 +1167,16 @@ EXPORT_SYMBOL(iucv_message_reply); * transmitted is in a buffer and this is a one-way message and the * receiver will not reply to the message. * + * Locking: no locking + * * Returns the result from the CP IUCV call. */ -int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, +int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size) { union iucv_param *parm; int rc; - local_bh_disable(); parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (flags & IUCV_IPRMDATA) { @@ -1144,6 +1199,34 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, rc = iucv_call_b2f0(IUCV_SEND, parm); if (!rc) msg->id = parm->db.ipmsgid; + return rc; +} +EXPORT_SYMBOL(__iucv_message_send); + +/** + * iucv_message_send + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) + * @srccls: source class of message + * @buffer: address of send buffer or address of struct iucv_array + * @size: length of send buffer + * + * This function transmits data to another application. Data to be + * transmitted is in a buffer and this is a one-way message and the + * receiver will not reply to the message. + * + * Locking: local_bh_enable/local_bh_disable + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, + u8 flags, u32 srccls, void *buffer, size_t size) +{ + int rc; + + local_bh_disable(); + rc = __iucv_message_send(path, msg, flags, srccls, buffer, size); local_bh_enable(); return rc; } @@ -1572,7 +1655,7 @@ static void iucv_external_interrupt(u16 code) BUG_ON(p->iptype < 0x01 || p->iptype > 0x09); work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC); if (!work) { - printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); + pr_warning("iucv_external_interrupt: out of memory\n"); return; } memcpy(&work->data, p, sizeof(work->data)); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b22e011653..f8bd8df5e25 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> @@ -34,15 +35,16 @@ #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) - -/* List of all pfkey sockets. */ -static HLIST_HEAD(pfkey_table); +static int pfkey_net_id; +struct netns_pfkey { + /* List of all pfkey sockets. */ + struct hlist_head table; + atomic_t socks_nr; +}; static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait); static DEFINE_RWLOCK(pfkey_table_lock); static atomic_t pfkey_table_users = ATOMIC_INIT(0); -static atomic_t pfkey_socks_nr = ATOMIC_INIT(0); - struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; @@ -89,6 +91,9 @@ static void pfkey_terminate_dump(struct pfkey_sock *pfk) static void pfkey_sock_destruct(struct sock *sk) { + struct net *net = sock_net(sk); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); @@ -100,7 +105,7 @@ static void pfkey_sock_destruct(struct sock *sk) WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); - atomic_dec(&pfkey_socks_nr); + atomic_dec(&net_pfkey->socks_nr); } static void pfkey_table_grab(void) @@ -151,8 +156,11 @@ static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { + struct net *net = sock_net(sk); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + pfkey_table_grab(); - sk_add_node(sk, &pfkey_table); + sk_add_node(sk, &net_pfkey->table); pfkey_table_ungrab(); } @@ -171,12 +179,10 @@ static struct proto key_proto = { static int pfkey_create(struct net *net, struct socket *sock, int protocol) { + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -195,7 +201,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol) sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; - atomic_inc(&pfkey_socks_nr); + atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); @@ -255,8 +261,10 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, - int broadcast_flags, struct sock *one_sk) + int broadcast_flags, struct sock *one_sk, + struct net *net) { + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct hlist_node *node; struct sk_buff *skb2 = NULL; @@ -269,7 +277,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return -ENOMEM; pfkey_lock_table(); - sk_for_each(sk, node, &pfkey_table) { + sk_for_each(sk, node, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; @@ -328,7 +336,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -367,7 +375,7 @@ static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -645,7 +653,7 @@ int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) xaddr); } -static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs) +static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) { struct sadb_sa *sa; struct sadb_address *addr; @@ -683,7 +691,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** if (!xaddr) return NULL; - return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(net, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) @@ -1058,7 +1066,8 @@ static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, return __pfkey_xfrm_state2msg(x, 0, hsc); } -static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, +static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, + struct sadb_msg *hdr, void **ext_hdrs) { struct xfrm_state *x; @@ -1122,7 +1131,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) return ERR_PTR(-EINVAL); - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); @@ -1298,6 +1307,7 @@ static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; @@ -1348,7 +1358,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1356,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1389,13 +1399,14 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xfrm_state_put(x); - pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) @@ -1404,14 +1415,14 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) { x->km.state = XFRM_STATE_ERROR; - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } spin_unlock_bh(&x->lock); xfrm_state_put(x); @@ -1476,18 +1487,19 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->pid; - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; - x = pfkey_msg2xfrm_state(hdr, ext_hdrs); + x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); @@ -1521,6 +1533,7 @@ out: static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; @@ -1530,7 +1543,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; - x = pfkey_xfrm_state_lookup(hdr, ext_hdrs); + x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; @@ -1562,6 +1575,7 @@ out: static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -1572,7 +1586,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; - x = pfkey_xfrm_state_lookup(hdr, ext_hdrs); + x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; @@ -1590,7 +1604,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1691,7 +1705,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg return -ENOBUFS; } - pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk); + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } @@ -1713,13 +1727,14 @@ static int key_notify_sa_flush(struct km_event *c) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); unsigned proto; struct km_event c; struct xfrm_audit audit_info; @@ -1732,13 +1747,14 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_state_flush(proto, &audit_info); + err = xfrm_state_flush(net, proto, &audit_info); if (err) return err; c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; + c.net = net; km_state_notify(NULL, &c); return 0; @@ -1768,7 +1784,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; @@ -1776,7 +1792,8 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) static int pfkey_dump_sa(struct pfkey_sock *pfk) { - return xfrm_state_walk(&pfk->dump.u.state, dump_sa, (void *) pfk); + struct net *net = sock_net(&pfk->sk); + return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) @@ -1817,7 +1834,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg * return -EINVAL; pfk->promisc = satype; } - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL); + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -1833,7 +1850,7 @@ static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) return 0; } -static u32 gen_reqid(void) +static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; @@ -1846,7 +1863,7 @@ static u32 gen_reqid(void) if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); - rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid); + rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk); if (rc != -EEXIST) return reqid; @@ -1857,6 +1874,7 @@ static u32 gen_reqid(void) static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) { + struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; @@ -1876,7 +1894,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; - if (!t->reqid && !(t->reqid = gen_reqid())) + if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } @@ -2147,7 +2165,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); out: return 0; @@ -2155,6 +2173,7 @@ out: static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; @@ -2174,7 +2193,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; @@ -2275,6 +2294,7 @@ out: static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; @@ -2324,7 +2344,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2372,7 +2392,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2557,6 +2577,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; @@ -2571,8 +2592,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, - delete, &err); + xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, dir, + pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; @@ -2625,7 +2646,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, - &pfk->sk); + &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; @@ -2633,7 +2654,8 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) static int pfkey_dump_sp(struct pfkey_sock *pfk) { - return xfrm_policy_walk(&pfk->dump.u.policy, dump_sp, (void *) pfk); + struct net *net = sock_net(&pfk->sk); + return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) @@ -2672,13 +2694,14 @@ static int key_notify_policy_flush(struct km_event *c) hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + struct net *net = sock_net(sk); struct km_event c; struct xfrm_audit audit_info; int err; @@ -2686,13 +2709,14 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); if (err) return err; c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2732,7 +2756,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg * int err; pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL); + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); @@ -2935,13 +2959,16 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) { - if (atomic_read(&pfkey_socks_nr) == 0) + struct net *net = x ? xs_net(x) : c->net; + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + + if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { @@ -3103,12 +3130,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; @@ -3141,7 +3169,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; - xp = xfrm_policy_alloc(GFP_ATOMIC); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; @@ -3300,7 +3328,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3491,7 +3519,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; @@ -3645,6 +3673,8 @@ static int pfkey_seq_show(struct seq_file *f, void *v) static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) { + struct net *net = seq_file_net(f); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *s; struct hlist_node *node; loff_t pos = *ppos; @@ -3653,7 +3683,7 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) if (pos == 0) return SEQ_START_TOKEN; - sk_for_each(s, node, &pfkey_table) + sk_for_each(s, node, &net_pfkey->table) if (pos-- == 1) return s; @@ -3662,9 +3692,12 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { + struct net *net = seq_file_net(f); + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + ++*ppos; return (v == SEQ_START_TOKEN) ? - sk_head(&pfkey_table) : + sk_head(&net_pfkey->table) : sk_next((struct sock *)v); } @@ -3682,38 +3715,39 @@ static struct seq_operations pfkey_seq_ops = { static int pfkey_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &pfkey_seq_ops); + return seq_open_net(inode, file, &pfkey_seq_ops, + sizeof(struct seq_net_private)); } static struct file_operations pfkey_proc_ops = { .open = pfkey_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -static int pfkey_init_proc(void) +static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; - e = proc_net_fops_create(&init_net, "pfkey", 0, &pfkey_proc_ops); + e = proc_net_fops_create(net, "pfkey", 0, &pfkey_proc_ops); if (e == NULL) return -ENOMEM; return 0; } -static void pfkey_exit_proc(void) +static void pfkey_exit_proc(struct net *net) { - proc_net_remove(&init_net, "pfkey"); + proc_net_remove(net, "pfkey"); } #else -static inline int pfkey_init_proc(void) +static int __net_init pfkey_init_proc(struct net *net) { return 0; } -static inline void pfkey_exit_proc(void) +static void pfkey_exit_proc(struct net *net) { } #endif @@ -3729,10 +3763,51 @@ static struct xfrm_mgr pfkeyv2_mgr = .migrate = pfkey_send_migrate, }; +static int __net_init pfkey_net_init(struct net *net) +{ + struct netns_pfkey *net_pfkey; + int rv; + + net_pfkey = kmalloc(sizeof(struct netns_pfkey), GFP_KERNEL); + if (!net_pfkey) { + rv = -ENOMEM; + goto out_kmalloc; + } + INIT_HLIST_HEAD(&net_pfkey->table); + atomic_set(&net_pfkey->socks_nr, 0); + rv = net_assign_generic(net, pfkey_net_id, net_pfkey); + if (rv < 0) + goto out_assign; + rv = pfkey_init_proc(net); + if (rv < 0) + goto out_proc; + return 0; + +out_proc: +out_assign: + kfree(net_pfkey); +out_kmalloc: + return rv; +} + +static void __net_exit pfkey_net_exit(struct net *net) +{ + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); + + pfkey_exit_proc(net); + BUG_ON(!hlist_empty(&net_pfkey->table)); + kfree(net_pfkey); +} + +static struct pernet_operations pfkey_net_ops = { + .init = pfkey_net_init, + .exit = pfkey_net_exit, +}; + static void __exit ipsec_pfkey_exit(void) { + unregister_pernet_gen_subsys(pfkey_net_id, &pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); - pfkey_exit_proc(); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3747,16 +3822,16 @@ static int __init ipsec_pfkey_init(void) err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_key_proto; - err = pfkey_init_proc(); + err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = xfrm_register_km(&pfkeyv2_mgr); + err = register_pernet_gen_subsys(&pfkey_net_id, &pfkey_net_ops); if (err != 0) - goto out_remove_proc_entry; + goto out_xfrm_unregister_km; out: return err; -out_remove_proc_entry: - pfkey_exit_proc(); +out_xfrm_unregister_km: + xfrm_unregister_km(&pfkeyv2_mgr); out_sock_unregister: sock_unregister(PF_KEY); out_unregister_key_proto: diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5bcc452a247..56fd85ab358 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -103,7 +103,6 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) * llc_ui_send_data - send data via reliable llc2 connection * @sk: Connection the socket is using. * @skb: Data the user wishes to send. - * @addr: Source and destination fields provided by the user. * @noblock: can we block waiting for data? * * Send data via reliable llc2 connection. diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 48212c0a961..b58bd7c6cdf 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -27,8 +27,7 @@ static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) { - DECLARE_MAC_BUF(mac); - seq_printf(seq, "%s", print_mac(mac, addr)); + seq_printf(seq, "%pM", addr); } static struct sock *llc_get_sk_idx(loff_t pos) diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 5bef1dcf18e..57b9304d444 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -20,8 +20,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_BUSY_TIMEOUT, @@ -29,8 +29,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_busy_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_P_TIMEOUT, @@ -38,8 +38,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_p_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { .ctl_name = NET_LLC2_REJ_TIMEOUT, @@ -47,8 +47,8 @@ static struct ctl_table llc2_timeout_table[] = { .data = &sysctl_llc2_rej_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; @@ -60,8 +60,8 @@ static struct ctl_table llc_station_table[] = { .data = &sysctl_llc_station_ack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, + .strategy = sysctl_jiffies, }, { 0 }, }; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 7f710a27e91..60c16162474 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -16,20 +16,20 @@ menu "Rate control algorithm selection" config MAC80211_RC_PID bool "PID controller based rate control algorithm" if EMBEDDED - default y ---help--- This option enables a TX rate control algorithm for mac80211 that uses a PID controller to select the TX rate. config MAC80211_RC_MINSTREL - bool "Minstrel" + bool "Minstrel" if EMBEDDED + default y ---help--- This option enables the 'minstrel' TX rate control algorithm choice prompt "Default rate control algorithm" - default MAC80211_RC_DEFAULT_PID + default MAC80211_RC_DEFAULT_MINSTREL ---help--- This option selects the default rate control algorithm mac80211 will use. Note that this default can still be @@ -55,8 +55,8 @@ endchoice config MAC80211_RC_DEFAULT string - default "pid" if MAC80211_RC_DEFAULT_PID default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL + default "pid" if MAC80211_RC_DEFAULT_PID default "" endmenu diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 31cfd1f89a7..7d4971aa443 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -46,3 +46,5 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) + +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 855126a3039..9d4e4d846ec 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,13 +17,6 @@ #include "rate.h" #include "mesh.h" -struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - return &local->hw; -} -EXPORT_SYMBOL(wiphy_to_hw); - static bool nl80211_type_check(enum nl80211_iftype type) { switch (type) { @@ -33,6 +26,8 @@ static bool nl80211_type_check(enum nl80211_iftype type) #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: #endif + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: return true; default: @@ -315,12 +310,35 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled = STATION_INFO_INACTIVE_TIME | STATION_INFO_RX_BYTES | - STATION_INFO_TX_BYTES; + STATION_INFO_TX_BYTES | + STATION_INFO_TX_BITRATE; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); sinfo->rx_bytes = sta->rx_bytes; sinfo->tx_bytes = sta->tx_bytes; + if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->signal = (s8)sta->last_signal; + } + + sinfo->txrate.flags = 0; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS) + sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) + sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; + + if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { + struct ieee80211_supported_band *sband; + sband = sta->local->hw.wiphy->bands[ + sta->local->hw.conf.channel->band]; + sinfo->txrate.legacy = + sband->bitrates[sta->last_tx_rate.idx].bitrate; + } else + sinfo->txrate.mcs = sta->last_tx_rate.idx; + if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH sinfo->filled |= STATION_INFO_LLID | @@ -401,8 +419,10 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, */ if (params->interval) { sdata->local->hw.conf.beacon_int = params->interval; - if (ieee80211_hw_config(sdata->local)) - return -EINVAL; + err = ieee80211_hw_config(sdata->local, + IEEE80211_CONF_CHANGE_BEACON_INTERVAL); + if (err < 0) + return err; /* * We updated some parameter so if below bails out * it's not an error. @@ -589,6 +609,8 @@ static void sta_apply_parameters(struct ieee80211_local *local, struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; + sband = local->hw.wiphy->bands[local->oper_channel->band]; + /* * FIXME: updating the flags is racy when this function is * called from ieee80211_change_station(), this will @@ -629,7 +651,6 @@ static void sta_apply_parameters(struct ieee80211_local *local, if (params->supported_rates) { rates = 0; - sband = local->hw.wiphy->bands[local->oper_channel->band]; for (i = 0; i < params->supported_rates_len; i++) { int rate = (params->supported_rates[i] & 0x7f) * 5; @@ -641,10 +662,10 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->sta.supp_rates[local->oper_channel->band] = rates; } - if (params->ht_capa) { - ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, - &sta->sta.ht_info); - } + if (params->ht_capa) + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + params->ht_capa, + &sta->sta.ht_cap); if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { switch (params->plink_action) { @@ -665,6 +686,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; + int layer2_update; /* Prevent a race with changing the rate control algorithm */ if (!netif_running(dev)) @@ -695,17 +717,25 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, rate_control_rate_init(sta); + layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_AP; + rcu_read_lock(); err = sta_info_insert(sta); if (err) { /* STA has been freed */ + if (err == -EEXIST && layer2_update) { + /* Need to update layer 2 devices on reassociation */ + sta = sta_info_get(local, mac); + if (sta) + ieee80211_send_layer2_update(sta); + } rcu_read_unlock(); return err; } - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_AP) + if (layer2_update) ieee80211_send_layer2_update(sta); rcu_read_unlock(); @@ -957,6 +987,72 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); return 0; } + +static int ieee80211_get_mesh_params(struct wiphy *wiphy, + struct net_device *dev, + struct mesh_config *conf) +{ + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + return -ENOTSUPP; + memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); + return 0; +} + +static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) +{ + return (mask >> (parm-1)) & 0x1; +} + +static int ieee80211_set_mesh_params(struct wiphy *wiphy, + struct net_device *dev, + const struct mesh_config *nconf, u32 mask) +{ + struct mesh_config *conf; + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + return -ENOTSUPP; + + /* Set the config options which we are interested in setting */ + conf = &(sdata->u.mesh.mshcfg); + if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) + conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) + conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) + conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) + conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; + if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) + conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; + if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) + conf->dot11MeshTTL = nconf->dot11MeshTTL; + if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) + conf->auto_open_plinks = nconf->auto_open_plinks; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) + conf->dot11MeshHWMPmaxPREQretries = + nconf->dot11MeshHWMPmaxPREQretries; + if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) + conf->path_refresh_time = nconf->path_refresh_time; + if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) + conf->min_discovery_timeout = nconf->min_discovery_timeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) + conf->dot11MeshHWMPactivePathTimeout = + nconf->dot11MeshHWMPactivePathTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) + conf->dot11MeshHWMPpreqMinInterval = + nconf->dot11MeshHWMPpreqMinInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + mask)) + conf->dot11MeshHWMPnetDiameterTraversalTime = + nconf->dot11MeshHWMPnetDiameterTraversalTime; + return 0; +} + #endif static int ieee80211_change_bss(struct wiphy *wiphy, @@ -972,25 +1068,79 @@ static int ieee80211_change_bss(struct wiphy *wiphy, return -EINVAL; if (params->use_cts_prot >= 0) { - sdata->bss_conf.use_cts_prot = params->use_cts_prot; + sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { - sdata->bss_conf.use_short_preamble = + sdata->vif.bss_conf.use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (params->use_short_slot_time >= 0) { - sdata->bss_conf.use_short_slot = + sdata->vif.bss_conf.use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } + if (params->basic_rates) { + int i, j; + u32 rates = 0; + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_supported_band *sband = + wiphy->bands[local->oper_channel->band]; + + for (i = 0; i < params->basic_rates_len; i++) { + int rate = (params->basic_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) + rates |= BIT(j); + } + } + sdata->vif.bss_conf.basic_rates = rates; + changed |= BSS_CHANGED_BASIC_RATES; + } + ieee80211_bss_info_change_notify(sdata, changed); return 0; } +static int ieee80211_set_txq_params(struct wiphy *wiphy, + struct ieee80211_txq_params *params) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_tx_queue_params p; + + if (!local->ops->conf_tx) + return -EOPNOTSUPP; + + memset(&p, 0, sizeof(p)); + p.aifs = params->aifs; + p.cw_max = params->cwmax; + p.cw_min = params->cwmin; + p.txop = params->txop; + if (local->ops->conf_tx(local_to_hw(local), params->queue, &p)) { + printk(KERN_DEBUG "%s: failed to set TX queue " + "parameters for queue %d\n", local->mdev->name, + params->queue); + return -EINVAL; + } + + return 0; +} + +static int ieee80211_set_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + local->oper_channel = chan; + local->oper_channel_type = channel_type; + + return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1013,6 +1163,10 @@ struct cfg80211_ops mac80211_config_ops = { .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, + .set_mesh_params = ieee80211_set_mesh_params, + .get_mesh_params = ieee80211_get_mesh_params, #endif .change_bss = ieee80211_change_bss, + .set_txq_params = ieee80211_set_txq_params, + .set_channel = ieee80211_set_channel, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 24ce5446331..2697a2fe608 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -47,18 +47,14 @@ static const struct file_operations name## _ops = { \ DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.channel->center_freq); -DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", - local->hw.conf.antenna_sel_tx); -DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", - local->hw.conf.antenna_sel_rx); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", local->fragmentation_threshold); DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d", - local->short_retry_limit); + local->hw.conf.short_frame_max_tx_count); DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", - local->long_retry_limit); + local->hw.conf.long_frame_max_tx_count); DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", @@ -202,8 +198,6 @@ void debugfs_hw_add(struct ieee80211_local *local) local->debugfs.keys = debugfs_create_dir("keys", phyd); DEBUGFS_ADD(frequency); - DEBUGFS_ADD(antenna_sel_tx); - DEBUGFS_ADD(antenna_sel_rx); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -258,8 +252,6 @@ void debugfs_hw_add(struct ieee80211_local *local) void debugfs_hw_del(struct ieee80211_local *local) { DEBUGFS_DEL(frequency); - DEBUGFS_DEL(antenna_sel_tx); - DEBUGFS_DEL(antenna_sel_rx); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index a3294d10932..6424ac565ae 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -188,7 +188,6 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) { static int keycount; char buf[50]; - DECLARE_MAC_BUF(mac); struct sta_info *sta; if (!key->local->debugfs.keys) @@ -206,8 +205,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) rcu_read_lock(); sta = rcu_dereference(key->sta); if (sta) - sprintf(buf, "../../stations/%s", - print_mac(mac, sta->sta.addr)); + sprintf(buf, "../../stations/%pM", sta->sta.addr); rcu_read_unlock(); /* using sta as a boolean is fine outside RCU lock */ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 2ad504fc341..c5421930172 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -41,29 +41,6 @@ static ssize_t ieee80211_if_read( return ret; } -#ifdef CONFIG_MAC80211_MESH -static ssize_t ieee80211_if_write( - struct ieee80211_sub_if_data *sdata, - char const __user *userbuf, - size_t count, loff_t *ppos, - int (*format)(struct ieee80211_sub_if_data *, char *)) -{ - char buf[10]; - int buf_size; - - memset(buf, 0x00, sizeof(buf)); - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, userbuf, buf_size)) - return count; - read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - (*format)(sdata, buf); - read_unlock(&dev_base_lock); - - return count; -} -#endif - #define IEEE80211_IF_FMT(name, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ @@ -71,19 +48,6 @@ static ssize_t ieee80211_if_fmt_##name( \ { \ return scnprintf(buf, buflen, format_string, sdata->field); \ } -#define IEEE80211_IF_WFMT(name, field, type) \ -static int ieee80211_if_wfmt_##name( \ - struct ieee80211_sub_if_data *sdata, char *buf) \ -{ \ - unsigned long tmp; \ - char *endp; \ - \ - tmp = simple_strtoul(buf, &endp, 0); \ - if ((endp == buf) || ((type)tmp != tmp)) \ - return -EINVAL; \ - sdata->field = tmp; \ - return 0; \ -} #define IEEE80211_IF_FMT_DEC(name, field) \ IEEE80211_IF_FMT(name, field, "%d\n") #define IEEE80211_IF_FMT_HEX(name, field) \ @@ -104,8 +68,7 @@ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ int buflen) \ { \ - DECLARE_MAC_BUF(mac); \ - return scnprintf(buf, buflen, "%s\n", print_mac(mac, sdata->field));\ + return scnprintf(buf, buflen, "%pM\n", sdata->field); \ } #define __IEEE80211_IF_FILE(name) \ @@ -126,34 +89,6 @@ static const struct file_operations name##_ops = { \ IEEE80211_IF_FMT_##format(name, field) \ __IEEE80211_IF_FILE(name) -#define __IEEE80211_IF_WFILE(name) \ -static ssize_t ieee80211_if_read_##name(struct file *file, \ - char __user *userbuf, \ - size_t count, loff_t *ppos) \ -{ \ - return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, \ - ieee80211_if_fmt_##name); \ -} \ -static ssize_t ieee80211_if_write_##name(struct file *file, \ - const char __user *userbuf, \ - size_t count, loff_t *ppos) \ -{ \ - return ieee80211_if_write(file->private_data, \ - userbuf, count, ppos, \ - ieee80211_if_wfmt_##name); \ -} \ -static const struct file_operations name##_ops = { \ - .read = ieee80211_if_read_##name, \ - .write = ieee80211_if_write_##name, \ - .open = mac80211_open_file_generic, \ -} - -#define IEEE80211_IF_WFILE(name, field, format, type) \ - IEEE80211_IF_FMT_##format(name, field) \ - IEEE80211_IF_WFMT(name, field, type) \ - __IEEE80211_IF_WFILE(name) - /* common attributes */ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); @@ -184,7 +119,7 @@ static ssize_t ieee80211_if_fmt_flags( sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", - sdata->bss_conf.use_cts_prot ? "CTS prot\n" : ""); + sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); @@ -212,30 +147,30 @@ IEEE80211_IF_FILE(dropped_frames_no_route, IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); /* Mesh parameters */ -IEEE80211_IF_WFILE(dot11MeshMaxRetries, - u.mesh.mshcfg.dot11MeshMaxRetries, DEC, u8); -IEEE80211_IF_WFILE(dot11MeshRetryTimeout, - u.mesh.mshcfg.dot11MeshRetryTimeout, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshConfirmTimeout, - u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshHoldingTimeout, - u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC, u8); -IEEE80211_IF_WFILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC, u8); -IEEE80211_IF_WFILE(dot11MeshMaxPeerLinks, - u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshHWMPactivePathTimeout, - u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32); -IEEE80211_IF_WFILE(dot11MeshHWMPpreqMinInterval, - u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshHWMPnetDiameterTraversalTime, - u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshHWMPmaxPREQretries, - u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8); -IEEE80211_IF_WFILE(path_refresh_time, - u.mesh.mshcfg.path_refresh_time, DEC, u32); -IEEE80211_IF_WFILE(min_discovery_timeout, - u.mesh.mshcfg.min_discovery_timeout, DEC, u16); +IEEE80211_IF_FILE(dot11MeshMaxRetries, + u.mesh.mshcfg.dot11MeshMaxRetries, DEC); +IEEE80211_IF_FILE(dot11MeshRetryTimeout, + u.mesh.mshcfg.dot11MeshRetryTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshConfirmTimeout, + u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshHoldingTimeout, + u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC); +IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC); +IEEE80211_IF_FILE(dot11MeshMaxPeerLinks, + u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout, + u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval, + u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime, + u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries, + u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC); +IEEE80211_IF_FILE(path_refresh_time, + u.mesh.mshcfg.path_refresh_time, DEC); +IEEE80211_IF_FILE(min_discovery_timeout, + u.mesh.mshcfg.min_discovery_timeout, DEC); #endif diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b85c4f27b36..a2fbe013131 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -39,13 +39,6 @@ static const struct file_operations sta_ ##name## _ops = { \ .open = mac80211_open_file_generic, \ } -#define STA_OPS_WR(name) \ -static const struct file_operations sta_ ##name## _ops = { \ - .read = sta_##name##_read, \ - .write = sta_##name##_write, \ - .open = mac80211_open_file_generic, \ -} - #define STA_FILE(name, field, format) \ STA_READ_##format(name, field) \ STA_OPS(name) @@ -144,7 +137,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf)+buf-p, "\n DTKN:"); for (i = 0; i < STA_TID_NUM; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%5d", - sta->ampdu_mlme.tid_state_rx[i]? + sta->ampdu_mlme.tid_state_rx[i] ? sta->ampdu_mlme.tid_rx[i]->dialog_token : 0); p += scnprintf(p, sizeof(buf)+buf-p, "\n TX :"); @@ -155,84 +148,20 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf)+buf-p, "\n DTKN:"); for (i = 0; i < STA_TID_NUM; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%5d", - sta->ampdu_mlme.tid_state_tx[i]? + sta->ampdu_mlme.tid_state_tx[i] ? sta->ampdu_mlme.tid_tx[i]->dialog_token : 0); p += scnprintf(p, sizeof(buf)+buf-p, "\n SSN :"); for (i = 0; i < STA_TID_NUM; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%5d", - sta->ampdu_mlme.tid_state_tx[i]? + sta->ampdu_mlme.tid_state_tx[i] ? sta->ampdu_mlme.tid_tx[i]->ssn : 0); p += scnprintf(p, sizeof(buf)+buf-p, "\n"); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } - -static ssize_t sta_agg_status_write(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct ieee80211_local *local = sta->sdata->local; - struct ieee80211_hw *hw = &local->hw; - u8 *da = sta->sta.addr; - static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - static int tid_static_rx[16] = {1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1}; - char *endp; - char buf[32]; - int buf_size, rs; - unsigned int tid_num; - char state[4]; - - memset(buf, 0x00, sizeof(buf)); - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - tid_num = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; - - if ((tid_num >= 100) && (tid_num <= 115)) { - /* toggle Rx aggregation command */ - tid_num = tid_num - 100; - if (tid_static_rx[tid_num] == 1) { - strcpy(state, "off"); - ieee80211_sta_stop_rx_ba_session(sta->sdata, da, tid_num, 0, - WLAN_REASON_QSTA_REQUIRE_SETUP); - sta->ampdu_mlme.tid_state_rx[tid_num] |= - HT_AGG_STATE_DEBUGFS_CTL; - tid_static_rx[tid_num] = 0; - } else { - strcpy(state, "on "); - sta->ampdu_mlme.tid_state_rx[tid_num] &= - ~HT_AGG_STATE_DEBUGFS_CTL; - tid_static_rx[tid_num] = 1; - } - printk(KERN_DEBUG "debugfs - try switching tid %u %s\n", - tid_num, state); - } else if ((tid_num >= 0) && (tid_num <= 15)) { - /* toggle Tx aggregation command */ - if (tid_static_tx[tid_num] == 0) { - strcpy(state, "on "); - rs = ieee80211_start_tx_ba_session(hw, da, tid_num); - if (rs == 0) - tid_static_tx[tid_num] = 1; - } else { - strcpy(state, "off"); - rs = ieee80211_stop_tx_ba_session(hw, da, tid_num, 1); - if (rs == 0) - tid_static_tx[tid_num] = 0; - } - printk(KERN_DEBUG "debugfs - switching tid %u %s, return=%d\n", - tid_num, state, rs); - } - - return count; -} -STA_OPS_WR(agg_status); +STA_OPS(agg_status); #define DEBUGFS_ADD(name) \ sta->debugfs.name = debugfs_create_file(#name, 0400, \ @@ -246,15 +175,14 @@ STA_OPS_WR(agg_status); void ieee80211_sta_debugfs_add(struct sta_info *sta) { struct dentry *stations_dir = sta->local->debugfs.stations; - DECLARE_MAC_BUF(mbuf); - u8 *mac; + u8 mac[3*ETH_ALEN]; sta->debugfs.add_has_run = true; if (!stations_dir) return; - mac = print_mac(mbuf, sta->sta.addr); + snprintf(mac, sizeof(mac), "%pM", sta->sta.addr); /* * This might fail due to a race condition: diff --git a/net/mac80211/event.c b/net/mac80211/event.c index 8de60de70bc..0d95561c0ee 100644 --- a/net/mac80211/event.c +++ b/net/mac80211/event.c @@ -21,14 +21,13 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke { union iwreq_data wrqu; char *buf = kmalloc(128, GFP_ATOMIC); - DECLARE_MAC_BUF(mac); if (buf) { /* TODO: needed parameters: count, key type, TSC */ sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=%s)", + "keyid=%d %scast addr=%pM)", keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", - print_mac(mac, hdr->addr2)); + hdr->addr2); memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = strlen(buf); wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index dc7d9a3d70d..5f510a13b9f 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -20,50 +20,138 @@ #include "sta_info.h" #include "wme.h" -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info) +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, + struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_sta_ht_cap *ht_cap) { + u8 ampdu_info, tx_mcs_set_cap; + int i, max_tx_streams; - if (ht_info == NULL) - return -EINVAL; + BUG_ON(!ht_cap); + + memset(ht_cap, 0, sizeof(*ht_cap)); + + if (!ht_cap_ie) + return; + + ht_cap->ht_supported = true; - memset(ht_info, 0, sizeof(*ht_info)); + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap; + ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; + ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; - if (ht_cap_ie) { - u8 ampdu_info = ht_cap_ie->ampdu_params_info; + ampdu_info = ht_cap_ie->ampdu_params_info; + ht_cap->ampdu_factor = + ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; + ht_cap->ampdu_density = + (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; - ht_info->ht_supported = 1; - ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); - ht_info->ampdu_factor = - ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; - ht_info->ampdu_density = - (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; - memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); - } else - ht_info->ht_supported = 0; + /* own MCS TX capabilities */ + tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; - return 0; + /* can we TX with MCS rates? */ + if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) + return; + + /* Counting from 0, therefore +1 */ + if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) + max_tx_streams = + ((tx_mcs_set_cap & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK) + >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; + else + max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; + + /* + * 802.11n D5.0 20.3.5 / 20.6 says: + * - indices 0 to 7 and 32 are single spatial stream + * - 8 to 31 are multiple spatial streams using equal modulation + * [8..15 for two streams, 16..23 for three and 24..31 for four] + * - remainder are multiple spatial streams using unequal modulation + */ + for (i = 0; i < max_tx_streams; i++) + ht_cap->mcs.rx_mask[i] = + sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; + + if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) + for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; + i < IEEE80211_HT_MCS_MASK_LEN; i++) + ht_cap->mcs.rx_mask[i] = + sband->ht_cap.mcs.rx_mask[i] & + ht_cap_ie->mcs.rx_mask[i]; + + /* handle MCS rate 32 too */ + if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) + ht_cap->mcs.rx_mask[32/8] |= 1; } -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info) +/* + * ieee80211_enable_ht should be called only after the operating band + * has been determined as ht configuration depends on the hw's + * HT abilities for a specific band. + */ +u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, + struct ieee80211_ht_info *hti, + u16 ap_ht_cap_flags) { - if (bss_info == NULL) - return -EINVAL; + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct ieee80211_bss_ht_conf ht; + u32 changed = 0; + bool enable_ht = true, ht_changed; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + memset(&ht, 0, sizeof(ht)); + + /* HT is not supported */ + if (!sband->ht_cap.ht_supported) + enable_ht = false; + + /* check that channel matches the right operating channel */ + if (local->hw.conf.channel->center_freq != + ieee80211_channel_to_frequency(hti->control_chan)) + enable_ht = false; + + if (enable_ht) { + channel_type = NL80211_CHAN_HT20; + + if (!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && + (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && + (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { + switch(hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + channel_type = NL80211_CHAN_HT40PLUS; + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + channel_type = NL80211_CHAN_HT40MINUS; + break; + } + } + } + + ht_changed = local->hw.conf.ht.enabled != enable_ht || + channel_type != local->hw.conf.ht.channel_type; + + local->oper_channel_type = channel_type; + local->hw.conf.ht.enabled = enable_ht; - memset(bss_info, 0, sizeof(*bss_info)); + if (ht_changed) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); - if (ht_add_info_ie) { - u16 op_mode; - op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); + /* disable HT */ + if (!enable_ht) + return 0; - bss_info->primary_channel = ht_add_info_ie->control_chan; - bss_info->bss_cap = ht_add_info_ie->ht_param; - bss_info->bss_op_mode = (u8)(op_mode & 0xff); + ht.operation_mode = le16_to_cpu(hti->operation_mode); + + /* if bss configuration changed store the new one */ + if (memcmp(&sdata->vif.bss_conf.ht, &ht, sizeof(ht))) { + changed |= BSS_CHANGED_HT; + sdata->vif.bss_conf.ht = ht; } - return 0; + return changed; } static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, @@ -241,7 +329,6 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; int ret, i; - DECLARE_MAC_BUF(mac); rcu_read_lock(); @@ -269,8 +356,8 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r BUG_ON(!local->ops->ampdu_action); #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); + printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", + ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, @@ -383,14 +470,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) u16 start_seq_num; u8 *state; int ret; - DECLARE_MAC_BUF(mac); - if (tid >= STA_TID_NUM) + if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Open BA session requested for %s tid %u\n", - print_mac(mac, ra), tid); + printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n", + ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ rcu_read_lock(); @@ -442,17 +528,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) (unsigned long)&sta->timer_to_tid[tid]; init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); + if (hw->ampdu_queues) { + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - queue unavailable for" - " tid %d\n", tid); + printk(KERN_DEBUG "BA request denied - " + "queue unavailable for tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; + goto err_unlock_queue; + } } sdata = sta->sdata; @@ -471,7 +559,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* No need to requeue the packets in the agg queue, since we * held the tx lock: no packet could be enqueued to the newly * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); + if (hw->ampdu_queues) + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" " tid %d\n", tid); @@ -481,7 +570,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } /* Will put all the packets in the new SW queue */ - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + if (hw->ampdu_queues) + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -524,7 +614,6 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, struct sta_info *sta; u8 *state; int ret = 0; - DECLARE_MAC_BUF(mac); if (tid >= STA_TID_NUM) return -EINVAL; @@ -546,11 +635,12 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, } #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); @@ -563,7 +653,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (ret) { WARN_ON(ret != -EBUSY); *state = HT_AGG_STATE_OPERATIONAL; - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); goto stop_BA_exit; } @@ -579,7 +670,6 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) struct ieee80211_local *local = hw_to_local(hw); struct sta_info *sta; u8 *state; - DECLARE_MAC_BUF(mac); if (tid >= STA_TID_NUM) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -594,8 +684,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) if (!sta) { rcu_read_unlock(); #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); + printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif return; } @@ -621,7 +710,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); #endif - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); } spin_unlock_bh(&sta->lock); rcu_read_unlock(); @@ -634,7 +724,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) struct sta_info *sta; u8 *state; int agg_queue; - DECLARE_MAC_BUF(mac); if (tid >= STA_TID_NUM) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -645,16 +734,15 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) } #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n", - print_mac(mac, ra), tid); + printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n", + ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ rcu_read_lock(); sta = sta_info_get(local, ra); if (!sta) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); + printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif rcu_read_unlock(); return; @@ -677,16 +765,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - agg_queue = sta->tid_to_tx_q[tid]; - - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped - */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + if (hw->ampdu_queues) { + agg_queue = sta->tid_to_tx_q[tid]; + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, + agg_queue)); + } spin_lock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; @@ -783,7 +873,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; u8 dialog_token; int ret = -EOPNOTSUPP; - DECLARE_MAC_BUF(mac); /* extract session parameters from addba request frame */ dialog_token = mgmt->u.action.u.addba_req.dialog_token; @@ -801,15 +890,16 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* sanity check for incoming parameters: * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ + /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) - && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) + && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "AddBA Req with bad params from " - "%s on tid %u. policy %d, buffer size %d\n", - print_mac(mac, mgmt->sa), tid, ba_policy, + "%pM on tid %u. policy %d, buffer size %d\n", + mgmt->sa, tid, ba_policy, buf_size); #endif /* CONFIG_MAC80211_HT_DEBUG */ goto end_no_lock; @@ -820,7 +910,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, sband = local->hw.wiphy->bands[conf->channel->band]; buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_info.ampdu_factor; + buf_size = buf_size << sband->ht_cap.ampdu_factor; } @@ -831,8 +921,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "unexpected AddBA Req from " - "%s on tid %u\n", - print_mac(mac, mgmt->sa), tid); + "%pM on tid %u\n", + mgmt->sa, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ goto end; } @@ -910,7 +1000,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct ieee80211_hw *hw = &local->hw; u16 capab; - u16 tid; + u16 tid, start_seq_num; u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); @@ -943,9 +1033,18 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, *state |= HT_ADDBA_RECEIVED_MSK; sta->ampdu_mlme.addba_req_num[tid] = 0; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (*state == HT_AGG_STATE_OPERATIONAL && + local->hw.ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (local->ops->ampdu_action) { + (void)local->ops->ampdu_action(hw, + IEEE80211_AMPDU_TX_RESUME, + &sta->sta, tid, &start_seq_num); + } +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ spin_unlock_bh(&sta->lock); } else { sta->ampdu_mlme.addba_req_num[tid]++; @@ -964,7 +1063,6 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u16 tid, params; u16 initiator; - DECLARE_MAC_BUF(mac); params = le16_to_cpu(mgmt->u.action.u.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; @@ -972,9 +1070,8 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) - printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n", - print_mac(mac, mgmt->sa), - initiator ? "initiator" : "recipient", tid, + printk(KERN_DEBUG "delba from %pM (%s) tid %d reason code %d\n", + mgmt->sa, initiator ? "initiator" : "recipient", tid, mgmt->u.action.u.delba.reason_code); #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 156e42a003a..f3eec989662 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/etherdevice.h> +#include <net/cfg80211.h> #include <net/wireless.h> #include <net/iw_handler.h> #include <net/mac80211.h> @@ -142,7 +143,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define IEEE80211_TX_FRAGMENTED BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) -#define IEEE80211_TX_PROBE_LAST_FRAG BIT(3) struct ieee80211_tx_data { struct sk_buff *skb; @@ -153,11 +153,6 @@ struct ieee80211_tx_data { struct ieee80211_key *key; struct ieee80211_channel *channel; - s8 rate_idx; - /* use this rate (if set) for last fragment; rate can - * be set to lower rate for the first fragments, e.g., - * when using CTS protection with IEEE 802.11g. */ - s8 last_frag_rate_idx; /* Extra fragments (in addition to the first fragment * in skb) */ @@ -192,7 +187,6 @@ struct ieee80211_rx_data { struct ieee80211_rx_status *status; struct ieee80211_rate *rate; - u16 ethertype; unsigned int flags; int sent_ps_buffered; int queue; @@ -203,9 +197,7 @@ struct ieee80211_rx_data { struct ieee80211_tx_stored_packet { struct sk_buff *skb; struct sk_buff **extra_frag; - s8 last_frag_rate_idx; int num_extra_frag; - bool last_frag_rate_ctrl_probe; }; struct beacon_data { @@ -219,9 +211,6 @@ struct ieee80211_if_ap { struct list_head vlans; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - size_t ssid_len; - /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */ @@ -255,26 +244,6 @@ struct mesh_preq_queue { u8 flags; }; -struct mesh_config { - /* Timeouts in ms */ - /* Mesh plink management parameters */ - u16 dot11MeshRetryTimeout; - u16 dot11MeshConfirmTimeout; - u16 dot11MeshHoldingTimeout; - u16 dot11MeshMaxPeerLinks; - u8 dot11MeshMaxRetries; - u8 dot11MeshTTL; - bool auto_open_plinks; - /* HWMP parameters */ - u8 dot11MeshHWMPmaxPREQretries; - u32 path_refresh_time; - u16 min_discovery_timeout; - u32 dot11MeshHWMPactivePathTimeout; - u16 dot11MeshHWMPpreqMinInterval; - u16 dot11MeshHWMPnetDiameterTraversalTime; -}; - - /* flags used in struct ieee80211_if_sta.flags */ #define IEEE80211_STA_SSID_SET BIT(0) #define IEEE80211_STA_BSSID_SET BIT(1) @@ -438,8 +407,7 @@ struct ieee80211_sub_if_data { struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; struct ieee80211_key *default_key; - /* BSS configuration for this interface. */ - struct ieee80211_bss_conf bss_conf; + u16 sequence_number; /* * AP this belongs to: self in AP mode and @@ -570,6 +538,11 @@ enum { IEEE80211_ADDBA_MSG = 4, }; +enum queue_stop_reason { + IEEE80211_QUEUE_STOP_REASON_DRIVER, + IEEE80211_QUEUE_STOP_REASON_PS, +}; + /* maximum number of hardware queues we support. */ #define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) @@ -586,7 +559,8 @@ struct ieee80211_local { const struct ieee80211_ops *ops; unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; - + unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; + spinlock_t queue_stop_reason_lock; struct net_device *mdev; /* wmaster# - "master" 802.11 device */ int open_count; int monitors, cooked_mntrs; @@ -633,8 +607,6 @@ struct ieee80211_local { int rts_threshold; int fragmentation_threshold; - int short_retry_limit; /* dot11ShortRetryLimit */ - int long_retry_limit; /* dot11LongRetryLimit */ struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; @@ -659,6 +631,7 @@ struct ieee80211_local { struct delayed_work scan_work; struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_channel *oper_channel, *scan_channel; + enum nl80211_channel_type oper_channel_type; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; struct list_head bss_list; @@ -722,13 +695,17 @@ struct ieee80211_local { int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ + bool powersave; + int dynamic_ps_timeout; + struct work_struct dynamic_ps_enable_work; + struct work_struct dynamic_ps_disable_work; + struct timer_list dynamic_ps_timer; + #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *rcdir; struct dentry *rcname; struct dentry *frequency; - struct dentry *antenna_sel_tx; - struct dentry *antenna_sel_rx; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; @@ -817,7 +794,7 @@ struct ieee802_11_elems { u8 *wmm_info; u8 *wmm_param; struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_addt_info *ht_info_elem; + struct ieee80211_ht_info *ht_info_elem; u8 *mesh_config; u8 *mesh_id; u8 *peer_link; @@ -869,11 +846,6 @@ static inline struct ieee80211_hw *local_to_hw( return &local->hw; } -struct sta_attribute { - struct attribute attr; - ssize_t (*show)(const struct sta_info *, char *buf); - ssize_t (*store)(struct sta_info *, const char *buf, size_t count); -}; static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { @@ -882,12 +854,9 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) } -int ieee80211_hw_config(struct ieee80211_local *local); +int ieee80211_hw_config(struct ieee80211_local *local, u32 changed); int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); -u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, - struct ieee80211_ht_info *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_configure_filter(struct ieee80211_local *local); @@ -906,8 +875,7 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta); struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, u8 *bssid, - u8 *addr, u64 supp_rates); + u8 *bssid, u8 *addr, u64 supp_rates); int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); @@ -968,11 +936,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); /* HT */ -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info); -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info); +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, + struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_sta_ht_cap *ht_cap); +u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, + struct ieee80211_ht_info *hti, + u16 ap_ht_cap_flags); void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, @@ -1014,6 +983,15 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); u64 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); +void ieee80211_dynamic_ps_enable_work(struct work_struct *work); +void ieee80211_dynamic_ps_disable_work(struct work_struct *work); +void ieee80211_dynamic_ps_timer(unsigned long data); + +void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + enum queue_stop_reason reason); +void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, + enum queue_stop_reason reason); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline #else diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8336fee68d3..5abbc3f07dd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -65,7 +65,7 @@ static int ieee80211_open(struct net_device *dev) struct ieee80211_if_init_conf conf; u32 changed = 0; int res; - bool need_hw_reconfig = 0; + u32 hw_reconf_flags = 0; u8 null_addr[ETH_ALEN] = {0}; /* fail early if user set an invalid address */ @@ -152,7 +152,8 @@ static int ieee80211_open(struct net_device *dev) res = local->ops->start(local_to_hw(local)); if (res) goto err_del_bss; - need_hw_reconfig = 1; + /* we're brought up, everything changes */ + hw_reconf_flags = ~0; ieee80211_led_radio(local, local->hw.conf.radio_enabled); } @@ -198,8 +199,10 @@ static int ieee80211_open(struct net_device *dev) /* must be before the call to ieee80211_configure_filter */ local->monitors++; - if (local->monitors == 1) + if (local->monitors == 1) { local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) local->fif_fcsfail++; @@ -226,8 +229,14 @@ static int ieee80211_open(struct net_device *dev) if (res) goto err_stop; - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif)) { + local->fif_other_bss++; + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + ieee80211_start_mesh(sdata); + } changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); ieee80211_enable_keys(sdata); @@ -279,8 +288,8 @@ static int ieee80211_open(struct net_device *dev) atomic_inc(&local->iff_promiscs); local->open_count++; - if (need_hw_reconfig) { - ieee80211_hw_config(local); + if (hw_reconf_flags) { + ieee80211_hw_config(local, hw_reconf_flags); /* * set default queue parameters so drivers don't * need to initialise the hardware if the hardware @@ -322,6 +331,7 @@ static int ieee80211_stop(struct net_device *dev) struct ieee80211_local *local = sdata->local; struct ieee80211_if_init_conf conf; struct sta_info *sta; + u32 hw_reconf_flags = 0; /* * Stop TX on this interface first. @@ -405,8 +415,10 @@ static int ieee80211_stop(struct net_device *dev) } local->monitors--; - if (local->monitors == 0) + if (local->monitors == 0) { local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) local->fif_fcsfail--; @@ -423,7 +435,11 @@ static int ieee80211_stop(struct net_device *dev) break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED; + /* Announce that we are leaving the network. */ + if (sdata->u.sta.state != IEEE80211_STA_MLME_DISABLED) + ieee80211_sta_deauthenticate(sdata, + WLAN_REASON_DEAUTH_LEAVING); + memset(sdata->u.sta.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.sta.timer); /* @@ -450,8 +466,15 @@ static int ieee80211_stop(struct net_device *dev) /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { - /* allmulti is always set on mesh ifaces */ + /* other_bss and allmulti are always set on mesh + * ifaces */ + local->fif_other_bss--; atomic_dec(&local->iff_allmultis); + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + ieee80211_stop_mesh(sdata); } /* fall through */ @@ -504,8 +527,15 @@ static int ieee80211_stop(struct net_device *dev) tasklet_disable(&local->tx_pending_tasklet); tasklet_disable(&local->tasklet); + + /* no reconfiguring after stop! */ + hw_reconf_flags = 0; } + /* do after stop to avoid reconfiguring when we stop anyway */ + if (hw_reconf_flags) + ieee80211_hw_config(local, hw_reconf_flags); + return 0; } @@ -668,6 +698,10 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, if (type == sdata->vif.type) return 0; + /* Setting ad-hoc mode on non-IBSS channel is not supported. */ + if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) + return -EOPNOTSUPP; + /* * We could, here, on changes between IBSS/STA/MESH modes, * invoke an MLME function instead that disassociates etc. @@ -682,7 +716,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); /* reset some values that shouldn't be kept across type changes */ - sdata->bss_conf.basic_rates = + sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a5b06fe7198..999f7aa4232 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -132,7 +132,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { const u8 *addr; int ret; - DECLARE_MAC_BUF(mac); assert_key_lock(); might_sleep(); @@ -154,16 +153,15 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) printk(KERN_ERR "mac80211-%s: failed to set key " - "(%d, %s) to hardware (%d)\n", + "(%d, %pM) to hardware (%d)\n", wiphy_name(key->local->hw.wiphy), - key->conf.keyidx, print_mac(mac, addr), ret); + key->conf.keyidx, addr, ret); } static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) { const u8 *addr; int ret; - DECLARE_MAC_BUF(mac); assert_key_lock(); might_sleep(); @@ -186,9 +184,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) if (ret) printk(KERN_ERR "mac80211-%s: failed to remove key " - "(%d, %s) from hardware (%d)\n", + "(%d, %pM) from hardware (%d)\n", wiphy_name(key->local->hw.wiphy), - key->conf.keyidx, print_mac(mac, addr), ret); + key->conf.keyidx, addr, ret); spin_lock(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ae62ad40ad6..24b14363d6e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -41,6 +41,8 @@ */ struct ieee80211_tx_status_rtap_hdr { struct ieee80211_radiotap_header hdr; + u8 rate; + u8 padding_for_rate; __le16 tx_flags; u8 data_retries; } __attribute__ ((packed)); @@ -169,19 +171,13 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) conf.changed = changed; if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + sdata->vif.type == NL80211_IFTYPE_ADHOC) conf.bssid = sdata->u.sta.bssid; - conf.ssid = sdata->u.sta.ssid; - conf.ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + else if (sdata->vif.type == NL80211_IFTYPE_AP) conf.bssid = sdata->dev->dev_addr; - conf.ssid = sdata->u.ap.ssid; - conf.ssid_len = sdata->u.ap.ssid_len; - } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + else if (ieee80211_vif_is_mesh(&sdata->vif)) { u8 zero[ETH_ALEN] = { 0 }; conf.bssid = zero; - conf.ssid = zero; - conf.ssid_len = 0; } else { WARN_ON(1); return -EINVAL; @@ -190,136 +186,73 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) return -EINVAL; - if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID))) - return -EINVAL; - return local->ops->config_interface(local_to_hw(local), &sdata->vif, &conf); } -int ieee80211_hw_config(struct ieee80211_local *local) +int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan; int ret = 0; + int power; + enum nl80211_channel_type channel_type; + + might_sleep(); - if (local->sw_scanning) + if (local->sw_scanning) { chan = local->scan_channel; - else + channel_type = NL80211_CHAN_NO_HT; + } else { chan = local->oper_channel; + channel_type = local->oper_channel_type; + } - local->hw.conf.channel = chan; + if (chan != local->hw.conf.channel || + channel_type != local->hw.conf.ht.channel_type) { + local->hw.conf.channel = chan; + local->hw.conf.ht.channel_type = channel_type; + switch (channel_type) { + case NL80211_CHAN_NO_HT: + local->hw.conf.ht.enabled = false; + break; + case NL80211_CHAN_HT20: + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: + local->hw.conf.ht.enabled = true; + break; + } + changed |= IEEE80211_CONF_CHANGE_CHANNEL; + } if (!local->hw.conf.power_level) - local->hw.conf.power_level = chan->max_power; + power = chan->max_power; else - local->hw.conf.power_level = min(chan->max_power, - local->hw.conf.power_level); - - local->hw.conf.max_antenna_gain = chan->max_antenna_gain; - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: HW CONFIG: freq=%d\n", - wiphy_name(local->hw.wiphy), chan->center_freq); -#endif - - if (local->open_count) - ret = local->ops->config(local_to_hw(local), &local->hw.conf); - - return ret; -} - -/** - * ieee80211_handle_ht should be used only after legacy configuration - * has been determined namely band, as ht configuration depends upon - * the hardware's HT abilities for a _specific_ band. - */ -u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, - struct ieee80211_ht_info *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap) -{ - struct ieee80211_conf *conf = &local->hw.conf; - struct ieee80211_supported_band *sband; - struct ieee80211_ht_info ht_conf; - struct ieee80211_ht_bss_info ht_bss_conf; - u32 changed = 0; - int i; - u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS; - u8 tx_mcs_set_cap; - - sband = local->hw.wiphy->bands[conf->channel->band]; - - memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); - memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); - - /* HT is not supported */ - if (!sband->ht_info.ht_supported) { - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - goto out; + power = min(chan->max_power, local->hw.conf.power_level); + if (local->hw.conf.power_level != power) { + changed |= IEEE80211_CONF_CHANGE_POWER; + local->hw.conf.power_level = power; } - /* disable HT */ - if (!enable_ht) { - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) - changed |= BSS_CHANGED_HT; - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - conf->ht_conf.ht_supported = 0; - goto out; + if (changed && local->open_count) { + ret = local->ops->config(local_to_hw(local), changed); + /* + * Goal: + * HW reconfiguration should never fail, the driver has told + * us what it can support so it should live up to that promise. + * + * Current status: + * rfkill is not integrated with mac80211 and a + * configuration command can thus fail if hardware rfkill + * is enabled + * + * FIXME: integrate rfkill with mac80211 and then add this + * WARN_ON() back + * + */ + /* WARN_ON(ret); */ } - - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) - changed |= BSS_CHANGED_HT; - - conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; - ht_conf.ht_supported = 1; - - ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; - ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS); - ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS; - ht_bss_conf.primary_channel = req_bss_cap->primary_channel; - ht_bss_conf.bss_cap = req_bss_cap->bss_cap; - ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; - - ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; - ht_conf.ampdu_density = req_ht_cap->ampdu_density; - - /* Bits 96-100 */ - tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12]; - - /* configure suppoerted Tx MCS according to requested MCS - * (based in most cases on Rx capabilities of peer) and self - * Tx MCS capabilities (as defined by low level driver HW - * Tx capabilities) */ - if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED)) - goto check_changed; - - /* Counting from 0 therfore + 1 */ - if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF) - max_tx_streams = ((tx_mcs_set_cap & - IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1; - - for (i = 0; i < max_tx_streams; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; - - if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM) - for (i = IEEE80211_SUPP_MCS_SET_UEQM; - i < IEEE80211_SUPP_MCS_SET_LEN; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; - -check_changed: - /* if bss configuration changed store the new one */ - if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || - memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { - changed |= BSS_CHANGED_HT; - memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); - memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); - } -out: - return changed; + return ret; } void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, @@ -336,15 +269,18 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (local->ops->bss_info_changed) local->ops->bss_info_changed(local_to_hw(local), &sdata->vif, - &sdata->bss_conf, + &sdata->vif.bss_conf, changed); } u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { - sdata->bss_conf.use_cts_prot = 0; - sdata->bss_conf.use_short_preamble = 0; - return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE; + sdata->vif.bss_conf.use_cts_prot = false; + sdata->vif.bss_conf.use_short_preamble = false; + sdata->vif.bss_conf.use_short_slot = false; + return BSS_CHANGED_ERP_CTS_PROT | + BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT; } void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, @@ -405,7 +341,8 @@ static void ieee80211_tasklet_handler(unsigned long data) dev_kfree_skb(skb); break ; default: - WARN_ON(1); + WARN(1, "mac80211: Packet is of unknown type %d\n", + skb->pkt_type); dev_kfree_skb(skb); break; } @@ -466,8 +403,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sta->tx_filtered_count++; /* @@ -514,10 +449,9 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } - if (!test_sta_flags(sta, WLAN_STA_PS) && - !(info->flags & IEEE80211_TX_CTL_REQUEUE)) { + if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) { /* Software retry the packet once */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; ieee80211_remove_tx_extra(local, sta->key, skb); dev_queue_xmit(skb); return; @@ -547,13 +481,28 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; struct sta_info *sta; + int retry_count = -1, i; + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* the HW cannot have attempted that rate */ + if (i >= hw->max_rates) { + info->status.rates[i].idx = -1; + info->status.rates[i].count = 0; + } + + retry_count += info->status.rates[i].count; + } + if (retry_count < 0) + retry_count = 0; rcu_read_lock(); + sband = local->hw.wiphy->bands[info->band]; + sta = sta_info_get(local, hdr->addr1); if (sta) { - if (info->status.excessive_retries && + if (!(info->flags & IEEE80211_TX_STAT_ACK) && test_sta_flags(sta, WLAN_STA_PS)) { /* * The STA is in power save mode, so assume @@ -584,12 +533,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_unlock(); return; } else { - if (info->status.excessive_retries) + if (!(info->flags & IEEE80211_TX_STAT_ACK)) sta->tx_retry_failed++; - sta->tx_retry_count += info->status.retry_count; + sta->tx_retry_count += retry_count; } - sband = local->hw.wiphy->bands[info->band]; rate_control_tx_status(local, sband, sta, skb); } @@ -610,9 +558,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) local->dot11TransmittedFrameCount++; if (is_multicast_ether_addr(hdr->addr1)) local->dot11MulticastTransmittedFrameCount++; - if (info->status.retry_count > 0) + if (retry_count > 0) local->dot11RetryCount++; - if (info->status.retry_count > 1) + if (retry_count > 1) local->dot11MultipleRetryCount++; } @@ -656,19 +604,30 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); rthdr->hdr.it_present = cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | + (1 << IEEE80211_RADIOTAP_RATE)); if (!(info->flags & IEEE80211_TX_STAT_ACK) && !is_multicast_ether_addr(hdr->addr1)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) && - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) + /* + * XXX: Once radiotap gets the bitmap reset thing the vendor + * extensions proposal contains, we can actually report + * the whole set of tries we did. + */ + if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) + rthdr->rate = sband->bitrates[ + info->status.rates[0].idx].bitrate / 5; - rthdr->data_retries = info->status.retry_count; + /* for now report the total retry_count */ + rthdr->data_retries = retry_count; /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); @@ -753,20 +712,30 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, BUG_ON(!ops->configure_filter); local->ops = ops; - local->hw.queues = 1; /* default */ - + /* set up some defaults */ + local->hw.queues = 1; + local->hw.max_rates = 1; local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; - local->short_retry_limit = 7; - local->long_retry_limit = 4; - local->hw.conf.radio_enabled = 1; + local->hw.conf.long_frame_max_tx_count = 4; + local->hw.conf.short_frame_max_tx_count = 7; + local->hw.conf.radio_enabled = true; INIT_LIST_HEAD(&local->interfaces); spin_lock_init(&local->key_lock); + spin_lock_init(&local->queue_stop_reason_lock); + INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); + INIT_WORK(&local->dynamic_ps_enable_work, + ieee80211_dynamic_ps_enable_work); + INIT_WORK(&local->dynamic_ps_disable_work, + ieee80211_dynamic_ps_disable_work); + setup_timer(&local->dynamic_ps_timer, + ieee80211_dynamic_ps_timer, (unsigned long) local); + sta_info_init(local); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, @@ -788,7 +757,6 @@ EXPORT_SYMBOL(ieee80211_alloc_hw); int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - const char *name; int result; enum ieee80211_band band; struct net_device *mdev; @@ -853,8 +821,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev->header_ops = &ieee80211_header_ops; mdev->set_multicast_list = ieee80211_master_set_multicast_list; - name = wiphy_dev(local->hw.wiphy)->driver->name; - local->hw.workqueue = create_freezeable_workqueue(name); + local->hw.workqueue = + create_freezeable_workqueue(wiphy_name(local->hw.wiphy)); if (!local->hw.workqueue) { result = -ENOMEM; goto fail_workqueue; @@ -921,12 +889,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->mdev->select_queue = ieee80211_select_queue; - /* add one default STA interface */ - result = ieee80211_if_add(local, "wlan%d", NULL, - NL80211_IFTYPE_STATION, NULL); - if (result) - printk(KERN_WARNING "%s: Failed to add default virtual iface\n", - wiphy_name(local->hw.wiphy)); + /* add one default STA interface if supported */ + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { + result = ieee80211_if_add(local, "wlan%d", NULL, + NL80211_IFTYPE_STATION, NULL); + if (result) + printk(KERN_WARNING "%s: Failed to add default virtual iface\n", + wiphy_name(local->hw.wiphy)); + } rtnl_unlock(); @@ -1013,7 +983,7 @@ static int __init ieee80211_init(void) BUILD_BUG_ON(sizeof(struct ieee80211_tx_info) > sizeof(skb->cb)); BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) + - IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb)); + IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb)); ret = rc80211_minstrel_init(); if (ret) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 8013277924f..82f568e9436 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -238,7 +238,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) pos = skb_put(skb, 21); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = MESH_CFG_LEN; + *pos++ = IEEE80211_MESH_CONFIG_LEN; /* Version */ *pos++ = 1; @@ -473,7 +473,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local= sdata->local; + struct ieee80211_local *local = sdata->local; struct ieee802_11_elems elems; struct ieee80211_channel *channel; u64 supp_rates = 0; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index e10471c6ba4..c197ab545e5 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -145,9 +145,6 @@ struct mesh_rmc { }; -/* Mesh IEs constants */ -#define MESH_CFG_LEN 19 - /* * MESH_CFG_COMP_LEN Includes: * - Active path selection protocol ID. @@ -157,7 +154,7 @@ struct mesh_rmc { * Does not include mesh capabilities, which may vary across nodes in the same * mesh */ -#define MESH_CFG_CMP_LEN 17 +#define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) /* Default values, timeouts in ms */ #define MESH_TTL 5 diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 501c7831adb..71fe6096123 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -218,12 +218,16 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (sta->fail_avg >= 100) return MAX_METRIC; + + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS) + return MAX_METRIC; + err = (sta->fail_avg << ARITH_SHIFT) / 100; /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ - rate = sband->bitrates[sta->last_txrate_idx].bitrate; + rate = sband->bitrates[sta->last_tx_rate.idx].bitrate; tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; @@ -759,7 +763,6 @@ enddiscovery: * * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through - * @fwd_frame: true if this frame was originally from a different host * * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is * found, the function will start a path discovery and queue the frame so it is diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index faac101c0f8..929ba542fd7 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -257,9 +257,6 @@ static void mesh_plink_timer(unsigned long data) struct sta_info *sta; __le16 llid, plid, reason; struct ieee80211_sub_if_data *sdata; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG - DECLARE_MAC_BUF(mac); -#endif /* * This STA is valid because sta_info_destroy() will @@ -274,8 +271,8 @@ static void mesh_plink_timer(unsigned long data) spin_unlock_bh(&sta->lock); return; } - mpl_dbg("Mesh plink timer for %s fired on state %d\n", - print_mac(mac, sta->sta.addr), sta->plink_state); + mpl_dbg("Mesh plink timer for %pM fired on state %d\n", + sta->sta.addr, sta->plink_state); reason = 0; llid = sta->llid; plid = sta->plid; @@ -287,9 +284,9 @@ static void mesh_plink_timer(unsigned long data) /* retry timer */ if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; - mpl_dbg("Mesh plink for %s (retry, timeout): %d %d\n", - print_mac(mac, sta->sta.addr), - sta->plink_retries, sta->plink_timeout); + mpl_dbg("Mesh plink for %pM (retry, timeout): %d %d\n", + sta->sta.addr, sta->plink_retries, + sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); sta->plink_timeout = sta->plink_timeout + rand % sta->plink_timeout; @@ -337,9 +334,6 @@ int mesh_plink_open(struct sta_info *sta) { __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG - DECLARE_MAC_BUF(mac); -#endif spin_lock_bh(&sta->lock); get_random_bytes(&llid, 2); @@ -351,8 +345,8 @@ int mesh_plink_open(struct sta_info *sta) sta->plink_state = PLINK_OPN_SNT; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mpl_dbg("Mesh plink: starting establishment with %s\n", - print_mac(mac, sta->sta.addr)); + mpl_dbg("Mesh plink: starting establishment with %pM\n", + sta->sta.addr); return mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); @@ -360,10 +354,6 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG - DECLARE_MAC_BUF(mac); -#endif - spin_lock_bh(&sta->lock); __mesh_plink_deactivate(sta); sta->plink_state = PLINK_BLOCKED; @@ -374,12 +364,8 @@ int mesh_plink_close(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; __le16 llid, plid, reason; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG - DECLARE_MAC_BUF(mac); -#endif - mpl_dbg("Mesh plink: closing link with %s\n", - print_mac(mac, sta->sta.addr)); + mpl_dbg("Mesh plink: closing link with %pM\n", sta->sta.addr); spin_lock_bh(&sta->lock); sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); reason = sta->reason; @@ -417,9 +403,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 ie_len; u8 *baseaddr; __le16 plid, llid, reason; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG - DECLARE_MAC_BUF(mac); -#endif /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) @@ -557,10 +540,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } } - mpl_dbg("Mesh plink (peer, state, llid, plid, event): %s %d %d %d %d\n", - print_mac(mac, mgmt->sa), sta->plink_state, - le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), - event); + mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %d %d %d %d\n", + mgmt->sa, sta->plink_state, + le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), + event); reason = 0; switch (sta->plink_state) { /* spin_unlock as soon as state is updated at each case */ @@ -660,8 +643,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); - mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->sta.addr)); + mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + sta->sta.addr); break; default: spin_unlock_bh(&sta->lock); @@ -693,8 +676,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->plink_state = PLINK_ESTAB; mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); - mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->sta.addr)); + mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + sta->sta.addr); mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 409bb771623..5ba721b6a39 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -14,7 +14,6 @@ #include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/wireless.h> #include <linux/random.h> @@ -236,7 +235,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *ies, *ht_add_ie; + u8 *pos, *ies, *ht_ie; int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_bss *bss; @@ -310,7 +309,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); mgmt->u.assoc_req.capab_info = cpu_to_le16(capab); - mgmt->u.reassoc_req.listen_interval = + mgmt->u.assoc_req.listen_interval = cpu_to_le16(local->hw.conf.listen_interval); } @@ -393,24 +392,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, /* wmm support is a must to HT */ if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && - sband->ht_info.ht_supported && - (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) { - struct ieee80211_ht_addt_info *ht_add_info = - (struct ieee80211_ht_addt_info *)ht_add_ie; - u16 cap = sband->ht_info.cap; + sband->ht_cap.ht_supported && + (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && + ht_ie[1] >= sizeof(struct ieee80211_ht_info)) { + struct ieee80211_ht_info *ht_info = + (struct ieee80211_ht_info *)(ht_ie + 2); + u16 cap = sband->ht_cap.cap; __le16 tmp; u32 flags = local->hw.conf.channel->flags; - switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) { - case IEEE80211_HT_IE_CHA_SEC_ABOVE: + switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } break; - case IEEE80211_HT_IE_CHA_SEC_BELOW: + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: if (flags & IEEE80211_CHAN_NO_FAT_BELOW) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } break; @@ -424,9 +424,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, memcpy(pos, &tmp, sizeof(u16)); pos += sizeof(u16); /* TODO: needs a define here for << 2 */ - *pos++ = sband->ht_info.ampdu_factor | - (sband->ht_info.ampdu_density << 2); - memcpy(pos, sband->ht_info.supp_mcs_set, 16); + *pos++ = sband->ht_cap.ampdu_factor | + (sband->ht_cap.ampdu_density << 2); + memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); } kfree(ifsta->assocreq_ies); @@ -568,25 +568,35 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } } -static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, - bool use_protection, - bool use_short_preamble) +static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, + u16 capab, bool erp_valid, u8 erp) { - struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_if_sta *ifsta = &sdata->u.sta; - DECLARE_MAC_BUF(mac); #endif u32 changed = 0; + bool use_protection; + bool use_short_preamble; + bool use_short_slot; + + if (erp_valid) { + use_protection = (erp & WLAN_ERP_USE_PROTECTION) != 0; + use_short_preamble = (erp & WLAN_ERP_BARKER_PREAMBLE) == 0; + } else { + use_protection = false; + use_short_preamble = !!(capab & WLAN_CAPABILITY_SHORT_PREAMBLE); + } + + use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); if (use_protection != bss_conf->use_cts_prot) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { - printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - "%s)\n", + printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", sdata->dev->name, use_protection ? "enabled" : "disabled", - print_mac(mac, ifsta->bssid)); + ifsta->bssid); } #endif bss_conf->use_cts_prot = use_protection; @@ -597,40 +607,28 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "%s: switched to %s barker preamble" - " (BSSID=%s)\n", + " (BSSID=%pM)\n", sdata->dev->name, use_short_preamble ? "short" : "long", - print_mac(mac, ifsta->bssid)); + ifsta->bssid); } #endif bss_conf->use_short_preamble = use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } - return changed; -} - -static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, - u8 erp_value) -{ - bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; - - return ieee80211_handle_protect_preamb(sdata, - use_protection, use_short_preamble); -} - -static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss *bss) -{ - u32 changed = 0; - - if (bss->has_erp_value) - changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value); - else { - u16 capab = bss->capability; - changed |= ieee80211_handle_protect_preamb(sdata, false, - (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + if (use_short_slot != bss_conf->use_short_slot) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s slot" + " (BSSID=%s)\n", + sdata->dev->name, + use_short_slot ? "short" : "long", + ifsta->bssid); + } +#endif + bss_conf->use_short_slot = use_short_slot; + changed |= BSS_CHANGED_ERP_SLOT; } return changed; @@ -701,14 +699,15 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) + struct ieee80211_if_sta *ifsta, + u32 bss_info_changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local_to_hw(local)->conf; - u32 changed = BSS_CHANGED_ASSOC; struct ieee80211_bss *bss; + bss_info_changed |= BSS_CHANGED_ASSOC; ifsta->flags |= IEEE80211_STA_ASSOCIATED; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -719,22 +718,16 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->ssid, ifsta->ssid_len); if (bss) { /* set timing information */ - sdata->bss_conf.beacon_int = bss->beacon_int; - sdata->bss_conf.timestamp = bss->timestamp; - sdata->bss_conf.dtim_period = bss->dtim_period; + sdata->vif.bss_conf.beacon_int = bss->beacon_int; + sdata->vif.bss_conf.timestamp = bss->timestamp; + sdata->vif.bss_conf.dtim_period = bss->dtim_period; - changed |= ieee80211_handle_bss_capability(sdata, bss); + bss_info_changed |= ieee80211_handle_bss_capability(sdata, + bss->capability, bss->has_erp_value, bss->erp_value); ieee80211_rx_bss_put(local, bss); } - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - changed |= BSS_CHANGED_HT; - sdata->bss_conf.assoc_ht = 1; - sdata->bss_conf.ht_conf = &conf->ht_conf; - sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; - } - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(sdata, ifsta); @@ -742,14 +735,25 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->last_probe = jiffies; ieee80211_led_assoc(local, 1); - sdata->bss_conf.assoc = 1; + sdata->vif.bss_conf.assoc = 1; /* * For now just always ask the driver to update the basic rateset * when we have associated, we aren't checking whether it actually * changed or not. */ - changed |= BSS_CHANGED_BASIC_RATES; - ieee80211_bss_info_change_notify(sdata, changed); + bss_info_changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_bss_info_change_notify(sdata, bss_info_changed); + + if (local->powersave) { + if (local->dynamic_ps_timeout > 0) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + else { + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } + } netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); @@ -760,18 +764,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - DECLARE_MAC_BUF(mac); - ifsta->direct_probe_tries++; if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { - printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", + sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata, ifsta); return; } - printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n", - sdata->dev->name, print_mac(mac, ifsta->bssid), + printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", + sdata->dev->name, ifsta->bssid, ifsta->direct_probe_tries); ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; @@ -791,33 +794,36 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - DECLARE_MAC_BUF(mac); - ifsta->auth_tries++; if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { - printk(KERN_DEBUG "%s: authentication with AP %s" + printk(KERN_DEBUG "%s: authentication with AP %pM" " timed out\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata, ifsta); return; } ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; - printk(KERN_DEBUG "%s: authenticate with AP %s\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + printk(KERN_DEBUG "%s: authenticate with AP %pM\n", + sdata->dev->name, ifsta->bssid); ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } +/* + * The disassoc 'reason' argument can be either our own reason + * if self disconnected or a reason code from the AP. + */ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, bool deauth, bool self_disconnected, u16 reason) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u32 changed = BSS_CHANGED_ASSOC; + u32 changed = 0, config_changed = 0; rcu_read_lock(); @@ -851,21 +857,40 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; changed |= ieee80211_reset_erp_info(sdata); - if (sdata->bss_conf.assoc_ht) - changed |= BSS_CHANGED_HT; - - sdata->bss_conf.assoc_ht = 0; - sdata->bss_conf.ht_conf = NULL; - sdata->bss_conf.ht_bss_conf = NULL; - ieee80211_led_assoc(local, 0); - sdata->bss_conf.assoc = 0; + changed |= BSS_CHANGED_ASSOC; + sdata->vif.bss_conf.assoc = false; ieee80211_sta_send_apinfo(sdata, ifsta); - if (self_disconnected) + if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) ifsta->state = IEEE80211_STA_MLME_DISABLED; + rcu_read_unlock(); + + local->hw.conf.ht.enabled = false; + local->oper_channel_type = NL80211_CHAN_NO_HT; + config_changed |= IEEE80211_CONF_CHANGE_HT; + + del_timer_sync(&local->dynamic_ps_timer); + cancel_work_sync(&local->dynamic_ps_enable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + config_changed |= IEEE80211_CONF_CHANGE_PS; + } + + ieee80211_hw_config(local, config_changed); + ieee80211_bss_info_change_notify(sdata, changed); + + rcu_read_lock(); + + sta = sta_info_get(local, ifsta->bssid); + if (!sta) { + rcu_read_unlock(); + return; + } + sta_info_unlink(&sta); rcu_read_unlock(); @@ -914,20 +939,19 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - DECLARE_MAC_BUF(mac); - ifsta->assoc_tries++; if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { - printk(KERN_DEBUG "%s: association with AP %s" + printk(KERN_DEBUG "%s: association with AP %pM" " timed out\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata, ifsta); return; } ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; - printk(KERN_DEBUG "%s: associate with AP %s\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + printk(KERN_DEBUG "%s: associate with AP %pM\n", + sdata->dev->name, ifsta->bssid); if (ieee80211_privacy_mismatch(sdata, ifsta)) { printk(KERN_DEBUG "%s: mismatch in privacy configuration and " "mixed-cell disabled - abort association\n", sdata->dev->name); @@ -947,7 +971,6 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; int disassoc; - DECLARE_MAC_BUF(mac); /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -960,8 +983,8 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(local, ifsta->bssid); if (!sta) { - printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", + sdata->dev->name, ifsta->bssid); disassoc = 1; } else { disassoc = 0; @@ -969,9 +992,9 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { printk(KERN_DEBUG "%s: No ProbeResp from " - "current AP %s - assume out of " + "current AP %pM - assume out of " "range\n", - sdata->dev->name, print_mac(mac, ifsta->bssid)); + sdata->dev->name, ifsta->bssid); disassoc = 1; } else ieee80211_send_probe_req(sdata, ifsta->bssid, @@ -1032,7 +1055,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, size_t len) { u16 auth_alg, auth_transaction, status_code; - DECLARE_MAC_BUF(mac); if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && sdata->vif.type != NL80211_IFTYPE_ADHOC) @@ -1125,7 +1147,6 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, size_t len) { u16 reason_code; - DECLARE_MAC_BUF(mac); if (len < 24 + 2) return; @@ -1136,7 +1157,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) - printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name); + printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", + sdata->dev->name, reason_code); if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || @@ -1157,7 +1179,6 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, size_t len) { u16 reason_code; - DECLARE_MAC_BUF(mac); if (len < 24 + 2) return; @@ -1168,7 +1189,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); if (ifsta->flags & IEEE80211_STA_ASSOCIATED) - printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name); + printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", + sdata->dev->name, reason_code); if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; @@ -1176,7 +1198,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(sdata, ifsta, false, false, 0); + ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); } @@ -1192,11 +1214,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u64 rates, basic_rates; u16 capab_info, status_code, aid; struct ieee802_11_elems elems; - struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u8 *pos; + u32 changed = 0; int i, j; - DECLARE_MAC_BUF(mac); - bool have_higher_than_11mbit = false; + bool have_higher_than_11mbit = false, newsta = false; + u16 ap_ht_cap_flags; /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ @@ -1214,9 +1237,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); - printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " + printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x " "status=%d aid=%d)\n", - sdata->dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), + sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa, capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); if (status_code != WLAN_STATUS_SUCCESS) { @@ -1259,7 +1282,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(local, ifsta->bssid); if (!sta) { struct ieee80211_bss *bss; - int err; + + newsta = true; sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); if (!sta) { @@ -1278,13 +1302,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_put(local, bss); } - err = sta_info_insert(sta); - if (err) { - printk(KERN_DEBUG "%s: failed to insert STA entry for" - " the AP (error %d)\n", sdata->dev->name, err); - rcu_read_unlock(); - return; - } /* update new sta with its last rx activity */ sta->last_rx = jiffies; } @@ -1308,34 +1325,40 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (i = 0; i < elems.supp_rates_len; i++) { int rate = (elems.supp_rates[i] & 0x7f) * 5; + bool is_basic = !!(elems.supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) + if (sband->bitrates[j].bitrate == rate) { rates |= BIT(j); - if (elems.supp_rates[i] & 0x80) - basic_rates |= BIT(j); + if (is_basic) + basic_rates |= BIT(j); + break; + } } } for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; + bool is_basic = !!(elems.supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) + if (sband->bitrates[j].bitrate == rate) { rates |= BIT(j); - if (elems.ext_supp_rates[i] & 0x80) - basic_rates |= BIT(j); + if (is_basic) + basic_rates |= BIT(j); + break; + } } } sta->sta.supp_rates[local->hw.conf.channel->band] = rates; - sdata->bss_conf.basic_rates = basic_rates; + sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && @@ -1344,31 +1367,43 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && - (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - struct ieee80211_ht_bss_info bss_info; - ieee80211_ht_cap_ie_to_ht_info( - elems.ht_cap_elem, &sta->sta.ht_info); - ieee80211_ht_addt_info_ie_to_ht_bss_info( - elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); - } + if (elems.ht_cap_elem) + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + elems.ht_cap_elem, &sta->sta.ht_cap); + + ap_ht_cap_flags = sta->sta.ht_cap.cap; rate_control_rate_init(sta); - if (elems.wmm_param) { + if (elems.wmm_param) set_sta_flags(sta, WLAN_STA_WME); - rcu_read_unlock(); + + if (newsta) { + int err = sta_info_insert(sta); + if (err) { + printk(KERN_DEBUG "%s: failed to insert STA entry for" + " the AP (error %d)\n", sdata->dev->name, err); + rcu_read_unlock(); + return; + } + } + + rcu_read_unlock(); + + if (elems.wmm_param) ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); - } else - rcu_read_unlock(); + + if (elems.ht_info_elem && elems.wmm_param && + (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, + ap_ht_cap_flags); /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; - ieee80211_set_associated(sdata, ifsta); + ieee80211_set_associated(sdata, ifsta, changed); ieee80211_associated(sdata, ifsta); } @@ -1386,6 +1421,13 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; union iwreq_data wrqu; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "response\n", sdata->dev->name); + return -ENOMEM; + } + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; /* Remove possible STA entries from other IBSS networks. */ @@ -1411,63 +1453,62 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, return res; /* Build IBSS probe response */ - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); - if (skb) { - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 24 + sizeof(mgmt->u.beacon)); - memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_PROBE_RESP); - memset(mgmt->da, 0xff, ETH_ALEN); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->u.beacon.beacon_int = - cpu_to_le16(local->hw.conf.beacon_int); - mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp); - mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability); - - pos = skb_put(skb, 2 + ifsta->ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ifsta->ssid_len; - memcpy(pos, ifsta->ssid, ifsta->ssid_len); - - rates = bss->supp_rates_len; - if (rates > 8) - rates = 8; - pos = skb_put(skb, 2 + rates); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = rates; - memcpy(pos, bss->supp_rates, rates); + skb_reserve(skb, local->hw.extra_tx_headroom); - if (bss->band == IEEE80211_BAND_2GHZ) { - pos = skb_put(skb, 2 + 1); - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(bss->freq); - } + mgmt = (struct ieee80211_mgmt *) + skb_put(skb, 24 + sizeof(mgmt->u.beacon)); + memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); + memset(mgmt->da, 0xff, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->u.beacon.beacon_int = + cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp); + mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability); - pos = skb_put(skb, 2 + 2); - *pos++ = WLAN_EID_IBSS_PARAMS; - *pos++ = 2; - /* FIX: set ATIM window based on scan results */ - *pos++ = 0; - *pos++ = 0; + pos = skb_put(skb, 2 + ifsta->ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ifsta->ssid_len; + memcpy(pos, ifsta->ssid, ifsta->ssid_len); - if (bss->supp_rates_len > 8) { - rates = bss->supp_rates_len - 8; - pos = skb_put(skb, 2 + rates); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = rates; - memcpy(pos, &bss->supp_rates[8], rates); - } + rates = bss->supp_rates_len; + if (rates > 8) + rates = 8; + pos = skb_put(skb, 2 + rates); + *pos++ = WLAN_EID_SUPP_RATES; + *pos++ = rates; + memcpy(pos, bss->supp_rates, rates); - ifsta->probe_resp = skb; + if (bss->band == IEEE80211_BAND_2GHZ) { + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(bss->freq); + } + + pos = skb_put(skb, 2 + 2); + *pos++ = WLAN_EID_IBSS_PARAMS; + *pos++ = 2; + /* FIX: set ATIM window based on scan results */ + *pos++ = 0; + *pos++ = 0; - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + if (bss->supp_rates_len > 8) { + rates = bss->supp_rates_len - 8; + pos = skb_put(skb, 2 + rates); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = rates; + memcpy(pos, &bss->supp_rates[8], rates); } + ifsta->probe_resp = skb; + + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + + rates = 0; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; for (i = 0; i < bss->supp_rates_len; i++) { @@ -1507,8 +1548,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, u64 beacon_timestamp, rx_timestamp; u64 supp_rates = 0; enum ieee80211_band band = rx_status->band; - DECLARE_MAC_BUF(mac); - DECLARE_MAC_BUF(mac2); if (elems->ds_params && elems->ds_params_len == 1) freq = ieee80211_channel_to_frequency(elems->ds_params[0]); @@ -1538,17 +1577,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_IBSS_DEBUG if (sta->sta.supp_rates[band] != prev_rates) printk(KERN_DEBUG "%s: updated supp_rates set " - "for %s based on beacon info (0x%llx | " + "for %pM based on beacon info (0x%llx | " "0x%llx -> 0x%llx)\n", sdata->dev->name, - print_mac(mac, sta->sta.addr), + sta->sta.addr, (unsigned long long) prev_rates, (unsigned long long) supp_rates, (unsigned long long) sta->sta.supp_rates[band]); #endif } else { - ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid, - mgmt->sa, supp_rates); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); } rcu_read_unlock(); @@ -1595,8 +1633,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * e.g: at 1 MBit that means mactime is 192 usec earlier * (=24 bytes * 8 usecs/byte) than the beacon timestamp. */ - int rate = local->hw.wiphy->bands[band]-> + int rate; + if (rx_status->flag & RX_FLAG_HT) { + rate = 65; /* TODO: HT rates */ + } else { + rate = local->hw.wiphy->bands[band]-> bitrates[rx_status->rate_idx].bitrate; + } rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); } else if (local && local->ops && local->ops->get_tsf) /* second best option: get current TSF */ @@ -1605,10 +1648,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* can't merge without knowing the TSF */ rx_timestamp = -1LLU; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "RX beacon SA=%s BSSID=" - "%s TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - print_mac(mac, mgmt->sa), - print_mac(mac2, mgmt->bssid), + printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" + "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, (unsigned long long)rx_timestamp, (unsigned long long)beacon_timestamp, (unsigned long long)(rx_timestamp - beacon_timestamp), @@ -1617,13 +1659,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (beacon_timestamp > rx_timestamp) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: beacon TSF higher than " - "local TSF - IBSS merge with BSSID %s\n", - sdata->dev->name, print_mac(mac, mgmt->bssid)); + "local TSF - IBSS merge with BSSID %pM\n", + sdata->dev->name, mgmt->bssid); #endif ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); - ieee80211_ibss_add_sta(sdata, NULL, - mgmt->bssid, mgmt->sa, - supp_rates); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); } } @@ -1671,8 +1711,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, size_t baselen; struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; + bool erp_valid; + u8 erp_value = 0; /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; @@ -1694,22 +1735,49 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); - if (elems.erp_info && elems.erp_info_len >= 1) - changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); - else { - u16 capab = le16_to_cpu(mgmt->u.beacon.capab_info); - changed |= ieee80211_handle_protect_preamb(sdata, false, - (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + + if (elems.erp_info && elems.erp_info_len >= 1) { + erp_valid = true; + erp_value = elems.erp_info[0]; + } else { + erp_valid = false; } + changed |= ieee80211_handle_bss_capability(sdata, + le16_to_cpu(mgmt->u.beacon.capab_info), + erp_valid, erp_value); + + + if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { + struct sta_info *sta; + struct ieee80211_supported_band *sband; + u16 ap_ht_cap_flags; + + rcu_read_lock(); - if (elems.ht_cap_elem && elems.ht_info_elem && - elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - struct ieee80211_ht_bss_info bss_info; + sta = sta_info_get(local, ifsta->bssid); + if (!sta) { + rcu_read_unlock(); + return; + } + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - ieee80211_ht_addt_info_ie_to_ht_bss_info( - elems.ht_info_elem, &bss_info); - changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf, - &bss_info); + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + elems.ht_cap_elem, &sta->sta.ht_cap); + + ap_ht_cap_flags = sta->sta.ht_cap.cap; + + rcu_read_unlock(); + + changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, + ap_ht_cap_flags); + } + + if (elems.country_elem) { + /* Note we are only reviewing this on beacons + * for the BSSID we are associated to */ + regulatory_hint_11d(local->hw.wiphy, + elems.country_elem, elems.country_elem_len); } ieee80211_bss_info_change_notify(sdata, changed); @@ -1727,11 +1795,6 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb; struct ieee80211_mgmt *resp; u8 *pos, *end; - DECLARE_MAC_BUF(mac); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - DECLARE_MAC_BUF(mac2); - DECLARE_MAC_BUF(mac3); -#endif if (sdata->vif.type != NL80211_IFTYPE_ADHOC || ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || @@ -1744,10 +1807,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = 1; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID=" - "%s (tx_last_beacon=%d)\n", - sdata->dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), - print_mac(mac3, mgmt->bssid), tx_last_beacon); + printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM" + " (tx_last_beacon=%d)\n", + sdata->dev->name, mgmt->sa, mgmt->da, + mgmt->bssid, tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (!tx_last_beacon) @@ -1763,8 +1826,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, pos + 2 + pos[1] > end) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from %s\n", - sdata->dev->name, print_mac(mac, mgmt->sa)); + "from %pM\n", + sdata->dev->name, mgmt->sa); #endif return; } @@ -1783,8 +1846,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n", - sdata->dev->name, print_mac(mac, resp->da)); + printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", + sdata->dev->name, resp->da); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ ieee80211_tx_skb(sdata, skb, 0); } @@ -1972,7 +2035,7 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, } } - if (hidden_ssid && ifsta->ssid_len == ssid_len) + if (hidden_ssid && (ifsta->ssid_len == ssid_len || ssid_len == 0)) return 1; if (ssid_len == 1 && ssid[0] == ' ') @@ -1990,7 +2053,6 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN], *pos; int i; int ret; - DECLARE_MAC_BUF(mac); #if 0 /* Easier testing, use fixed BSSID. */ @@ -2006,8 +2068,8 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, bssid[0] |= 0x02; #endif - printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", - sdata->dev->name, print_mac(mac, bssid)); + printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", + sdata->dev->name, bssid); bss = ieee80211_rx_bss_add(local, bssid, local->hw.conf.channel->center_freq, @@ -2050,8 +2112,6 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, int found = 0; u8 bssid[ETH_ALEN]; int active_ibss; - DECLARE_MAC_BUF(mac); - DECLARE_MAC_BUF(mac2); if (ifsta->ssid_len == 0) return -EINVAL; @@ -2068,8 +2128,7 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, || !(bss->capability & WLAN_CAPABILITY_IBSS)) continue; #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " bssid=%s found\n", - print_mac(mac, bss->bssid)); + printk(KERN_DEBUG " bssid=%pM found\n", bss->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ memcpy(bssid, bss->bssid, ETH_ALEN); found = 1; @@ -2080,9 +2139,8 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_IBSS_DEBUG if (found) - printk(KERN_DEBUG " sta_find_ibss: selected %s current " - "%s\n", print_mac(mac, bssid), - print_mac(mac2, ifsta->bssid)); + printk(KERN_DEBUG " sta_find_ibss: selected %pM current " + "%pM\n", bssid, ifsta->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { @@ -2099,9 +2157,9 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, if (!bss) goto dont_join; - printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" + printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" " based on configured SSID\n", - sdata->dev->name, print_mac(mac, bssid)); + sdata->dev->name, bssid); ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); ieee80211_rx_bss_put(local, bss); return ret; @@ -2338,12 +2396,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) * must be callable in atomic context. */ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, u8 *bssid, - u8 *addr, u64 supp_rates) + u8 *bssid,u8 *addr, u64 supp_rates) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; - DECLARE_MAC_BUF(mac); int band = local->hw.conf.channel->band; /* TODO: Could consider removing the least recently used entry and @@ -2351,7 +2407,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry %s\n", sdata->dev->name, print_mac(mac, addr)); + "entry %pM\n", sdata->dev->name, addr); } return NULL; } @@ -2360,8 +2416,8 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return NULL; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", - wiphy_name(local->hw.wiphy), print_mac(mac, addr), sdata->dev->name); + printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", + wiphy_name(local->hw.wiphy), addr, sdata->dev->name); #endif sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); @@ -2408,7 +2464,6 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { struct ieee80211_if_sta *ifsta; - int res; if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; @@ -2420,19 +2475,6 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size memcpy(ifsta->ssid, ssid, len); ifsta->ssid_len = len; ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - - res = 0; - /* - * Hack! MLME code needs to be cleaned up to have different - * entry points for configuration and internal selection change - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new SSID to " - "the low-level driver\n", sdata->dev->name); - return res; - } } if (len) @@ -2560,3 +2602,39 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) ieee80211_restart_sta_timer(sdata); rcu_read_unlock(); } + +void ieee80211_dynamic_ps_disable_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + dynamic_ps_disable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } + + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_PS); +} + +void ieee80211_dynamic_ps_enable_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + dynamic_ps_enable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) + return; + + local->hw.conf.flags |= IEEE80211_CONF_PS; + + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); +} + +void ieee80211_dynamic_ps_timer(unsigned long data) +{ + struct ieee80211_local *local = (void *) data; + + queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); +} diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 5d786720d93..3fa7ab28506 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -199,48 +199,44 @@ static void rate_control_release(struct kref *kref) } void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct sta_info *sta, struct sk_buff *skb, - struct rate_selection *sel) + struct sta_info *sta, + struct ieee80211_tx_rate_control *txrc) { struct rate_control_ref *ref = sdata->local->rate_ctrl; void *priv_sta = NULL; struct ieee80211_sta *ista = NULL; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; - sel->rate_idx = -1; - sel->nonerp_idx = -1; - sel->probe_idx = -1; - sel->max_rate_idx = sdata->max_ratectrl_rateidx; - if (sta) { ista = &sta->sta; priv_sta = sta->rate_ctrl_priv; } + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + info->control.rates[i].idx = -1; + info->control.rates[i].flags = 0; + info->control.rates[i].count = 1; + } + if (sta && sdata->force_unicast_rateidx > -1) - sel->rate_idx = sdata->force_unicast_rateidx; + info->control.rates[0].idx = sdata->force_unicast_rateidx; else - ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); - - if (sdata->max_ratectrl_rateidx > -1 && - sel->rate_idx > sdata->max_ratectrl_rateidx) - sel->rate_idx = sdata->max_ratectrl_rateidx; - - BUG_ON(sel->rate_idx < 0); - - /* Select a non-ERP backup rate. */ - if (sel->nonerp_idx < 0) { - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) - break; - - if (rate_supported(ista, sband->band, i) && - !(rate->flags & IEEE80211_RATE_ERP_G)) - sel->nonerp_idx = i; - } + ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + + /* + * try to enforce the maximum rate the user wanted + */ + if (sdata->max_ratectrl_rateidx > -1) + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) + continue; + info->control.rates[i].idx = + min_t(s8, info->control.rates[i].idx, + sdata->max_ratectrl_rateidx); } + + BUG_ON(info->control.rates[0].idx < 0); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index d0092f847f8..928da625e28 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -31,9 +31,8 @@ struct rate_control_ref { struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct sta_info *sta, struct sk_buff *skb, - struct rate_selection *sel); + struct sta_info *sta, + struct ieee80211_tx_rate_control *txrc); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); @@ -64,12 +63,6 @@ static inline void rate_control_rate_init(struct sta_info *sta) } -static inline void rate_control_clear(struct ieee80211_local *local) -{ - struct rate_control_ref *ref = local->rate_ctrl; - ref->ops->clear(ref->priv); -} - static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, struct ieee80211_sta *sta, gfp_t gfp) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index f6d69dab07a..2b3b490a607 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -126,7 +126,9 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) mr->adjusted_retry_count = mr->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; + mr->sample_limit = 4; } else { + mr->sample_limit = -1; mr->adjusted_retry_count = mr->retry_count; } if (!mr->adjusted_retry_count) @@ -169,30 +171,20 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_sta_info *mi = priv_sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_tx_altrate *ar = info->status.retries; - struct minstrel_priv *mp = priv; - int i, ndx, tries; - int success = 0; - - if (!info->status.excessive_retries) - success = 1; + struct ieee80211_tx_rate *ar = info->status.rates; + int i, ndx; + int success; - if (!mp->has_mrr || (ar[0].rate_idx < 0)) { - ndx = rix_to_ndx(mi, info->tx_rate_idx); - tries = info->status.retry_count + 1; - mi->r[ndx].success += success; - mi->r[ndx].attempts += tries; - return; - } + success = !!(info->flags & IEEE80211_TX_STAT_ACK); - for (i = 0; i < 4; i++) { - if (ar[i].rate_idx < 0) + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + if (ar[i].idx < 0) break; - ndx = rix_to_ndx(mi, ar[i].rate_idx); - mi->r[ndx].attempts += ar[i].limit + 1; + ndx = rix_to_ndx(mi, ar[i].idx); + mi->r[ndx].attempts += ar[i].count; - if ((i != 3) && (ar[i + 1].rate_idx < 0)) + if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0)) mi->r[ndx].success += success; } @@ -210,9 +202,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr, { unsigned int retry = mr->adjusted_retry_count; - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) retry = max(2U, min(mr->retry_count_rtscts, retry)); - else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) retry = max(2U, min(mr->retry_count_cts, retry)); return retry; } @@ -233,15 +225,16 @@ minstrel_get_next_sample(struct minstrel_sta_info *mi) return sample_ndx; } -void -minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, struct rate_selection *sel) +static void +minstrel_get_rate(void *priv, struct ieee80211_sta *sta, + void *priv_sta, struct ieee80211_tx_rate_control *txrc) { + struct sk_buff *skb = txrc->skb; + struct ieee80211_supported_band *sband = txrc->sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct minstrel_sta_info *mi = priv_sta; struct minstrel_priv *mp = priv; - struct ieee80211_tx_altrate *ar = info->control.retries; + struct ieee80211_tx_rate *ar = info->control.rates; unsigned int ndx, sample_ndx = 0; bool mrr; bool sample_slower = false; @@ -251,16 +244,12 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, int sample_rate; if (!sta || !mi || use_low_rate(skb)) { - sel->rate_idx = rate_lowest_index(sband, sta); + ar[0].idx = rate_lowest_index(sband, sta); + ar[0].count = mp->max_retry; return; } - mrr = mp->has_mrr; - - /* mac80211 does not allow mrr for RTS/CTS */ - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) - mrr = false; + mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; if (time_after(jiffies, mi->stats_update + (mp->update_interval * HZ) / 1000)) @@ -278,7 +267,8 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, (mi->sample_count + mi->sample_deferred / 2); /* delta > 0: sampling required */ - if (delta > 0) { + if ((delta > 0) && (mrr || !mi->prev_sample)) { + struct minstrel_rate *msr; if (mi->packet_count >= 10000) { mi->sample_deferred = 0; mi->sample_count = 0; @@ -297,13 +287,20 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, } sample_ndx = minstrel_get_next_sample(mi); + msr = &mi->r[sample_ndx]; sample = true; - sample_slower = mrr && (mi->r[sample_ndx].perfect_tx_time > + sample_slower = mrr && (msr->perfect_tx_time > mi->r[ndx].perfect_tx_time); if (!sample_slower) { - ndx = sample_ndx; - mi->sample_count++; + if (msr->sample_limit != 0) { + ndx = sample_ndx; + mi->sample_count++; + if (msr->sample_limit > 0) + msr->sample_limit--; + } else { + sample = false; + } } else { /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark * packets that have the sampling rate deferred to the @@ -315,13 +312,22 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, mi->sample_deferred++; } } - sel->rate_idx = mi->r[ndx].rix; - info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info); + mi->prev_sample = sample; + + /* If we're not using MRR and the sampling rate already + * has a probability of >95%, we shouldn't be attempting + * to use it, as this only wastes precious airtime */ + if (!mrr && sample && (mi->r[ndx].probability > 17100)) + ndx = mi->max_tp_rate; + + ar[0].idx = mi->r[ndx].rix; + ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info); if (!mrr) { - ar[0].rate_idx = mi->lowest_rix; - ar[0].limit = mp->max_retry; - ar[1].rate_idx = -1; + if (!sample) + ar[0].count = mp->max_retry; + ar[1].idx = mi->lowest_rix; + ar[1].count = mp->max_retry; return; } @@ -336,9 +342,9 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, } mrr_ndx[1] = mi->max_prob_rate; mrr_ndx[2] = 0; - for (i = 0; i < 3; i++) { - ar[i].rate_idx = mi->r[mrr_ndx[i]].rix; - ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count; + for (i = 1; i < 4; i++) { + ar[i].idx = mi->r[mrr_ndx[i - 1]].rix; + ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count; } } @@ -415,6 +421,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, /* calculate maximum number of retransmissions before * fallback (based on maximum segment size) */ + mr->sample_limit = -1; mr->retry_count = 1; mr->retry_count_cts = 1; mr->retry_count_rtscts = 1; @@ -500,11 +507,6 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) kfree(mi); } -static void -minstrel_clear(void *priv) -{ -} - static void * minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { @@ -532,13 +534,13 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; - if (hw->max_altrate_tries > 0) - mp->max_retry = hw->max_altrate_tries; + if (hw->max_rate_tries > 0) + mp->max_retry = hw->max_rate_tries; else /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; - if (hw->max_altrates >= 3) + if (hw->max_rates >= 4) mp->has_mrr = true; mp->hw = hw; @@ -558,7 +560,6 @@ static struct rate_control_ops mac80211_minstrel = { .tx_status = minstrel_tx_status, .get_rate = minstrel_get_rate, .rate_init = minstrel_rate_init, - .clear = minstrel_clear, .alloc = minstrel_alloc, .free = minstrel_free, .alloc_sta = minstrel_alloc_sta, diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 9a90a6aee04..869fe0ef951 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -16,6 +16,7 @@ struct minstrel_rate { unsigned int perfect_tx_time; unsigned int ack_time; + int sample_limit; unsigned int retry_count; unsigned int retry_count_cts; unsigned int retry_count_rtscts; @@ -57,6 +58,7 @@ struct minstrel_sta_info { int n_rates; struct minstrel_rate *r; + bool prev_sample; /* sampling table */ u8 *sample_table; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 01d64d53f3b..1a873f00691 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -49,7 +49,7 @@ /* Arithmetic right shift for positive and negative values for ISO C. */ #define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \ - (x) < 0 ? -((-(x)) >> (y)) : (x) >> (y) + ((x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)) enum rc_pid_event_type { RC_PID_EVENT_TYPE_TX_STATUS, @@ -61,6 +61,7 @@ enum rc_pid_event_type { union rc_pid_event_data { /* RC_PID_EVENT_TX_STATUS */ struct { + u32 flags; struct ieee80211_tx_info tx_status; }; /* RC_PID_EVENT_TYPE_RATE_CHANGE */ diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 86eb374e3b8..b16801cde06 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -241,7 +241,7 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ - if (info->tx_rate_idx != spinfo->txrate_idx) + if (info->status.rates[0].idx != spinfo->txrate_idx) return; spinfo->tx_num_xmit++; @@ -253,10 +253,10 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba /* We count frames that totally failed to be transmitted as two bad * frames, those that made it out but had some retries as one good and * one bad frame. */ - if (info->status.excessive_retries) { + if (!(info->flags & IEEE80211_TX_STAT_ACK)) { spinfo->tx_num_failed += 2; spinfo->tx_num_xmit++; - } else if (info->status.retry_count) { + } else if (info->status.rates[0].count > 1) { spinfo->tx_num_failed++; spinfo->tx_num_xmit++; } @@ -270,23 +270,32 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba } static void -rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, - struct rate_selection *sel) +rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta, + void *priv_sta, + struct ieee80211_tx_rate_control *txrc) { + struct sk_buff *skb = txrc->skb; + struct ieee80211_supported_band *sband = txrc->sband; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; + if (txrc->rts) + info->control.rates[0].count = + txrc->hw->conf.long_frame_max_tx_count; + else + info->control.rates[0].count = + txrc->hw->conf.short_frame_max_tx_count; + /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); if (!sta || !spinfo || (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || is_multicast_ether_addr(hdr->addr1)) { - sel->rate_idx = rate_lowest_index(sband, sta); + info->control.rates[0].idx = rate_lowest_index(sband, sta); return; } @@ -295,7 +304,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - sel->rate_idx = rateidx; + info->control.rates[0].idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS rate_control_pid_event_tx_rate(&spinfo->events, @@ -394,11 +403,11 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_p); + debugfsdir, (u32 *)&pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_i); + debugfsdir, (u32 *)&pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->coeff_d); + debugfsdir, (u32 *)&pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); @@ -437,10 +446,6 @@ static void rate_control_pid_free(void *priv) kfree(pinfo); } -static void rate_control_pid_clear(void *priv) -{ -} - static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { @@ -471,7 +476,6 @@ static struct rate_control_ops mac80211_rcpid = { .tx_status = rate_control_pid_tx_status, .get_rate = rate_control_pid_get_rate, .rate_init = rate_control_pid_rate_init, - .clear = rate_control_pid_clear, .alloc = rate_control_pid_alloc, .free = rate_control_pid_free, .alloc_sta = rate_control_pid_alloc_sta, diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index 8121d3bc683..a08a9b53034 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -43,6 +43,7 @@ void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, { union rc_pid_event_data evd; + evd.flags = stat->flags; memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info)); rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd); } @@ -167,8 +168,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, switch (ev->type) { case RC_PID_EVENT_TYPE_TX_STATUS: p += snprintf(pb + p, length - p, "tx_status %u %u", - ev->data.tx_status.status.excessive_retries, - ev->data.tx_status.status.retry_count); + !(ev->data.flags & IEEE80211_TX_STAT_ACK), + ev->data.tx_status.status.rates[0].idx); break; case RC_PID_EVENT_TYPE_RATE_CHANGE: p += snprintf(pb + p, length - p, "rate_change %d %d", diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index cf6b121e1bb..7175ae80c36 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -26,10 +26,11 @@ #include "tkip.h" #include "wme.h" -u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb, u16 mpdu_seq_num, - int bar_req); +static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, + struct tid_ampdu_rx *tid_agg_rx, + struct sk_buff *skb, + u16 mpdu_seq_num, + int bar_req); /* * monitor mode reception * @@ -122,7 +123,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* radiotap header, set always present flags */ rthdr->it_present = cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | (1 << IEEE80211_RADIOTAP_CHANNEL) | (1 << IEEE80211_RADIOTAP_ANTENNA) | (1 << IEEE80211_RADIOTAP_RX_FLAGS)); @@ -148,7 +148,19 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_RATE */ - *pos = rate->bitrate / 5; + if (status->flag & RX_FLAG_HT) { + /* + * TODO: add following information into radiotap header once + * suitable fields are defined for it: + * - MCS index (status->rate_idx) + * - HT40 (status->flag & RX_FLAG_40MHZ) + * - short-GI (status->flag & RX_FLAG_SHORT_GI) + */ + *pos = 0; + } else { + rthdr->it_present |= (1 << IEEE80211_RADIOTAP_RATE); + *pos = rate->bitrate / 5; + } pos++; /* IEEE80211_RADIOTAP_CHANNEL */ @@ -653,13 +665,16 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) static void ap_sta_ps_start(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - DECLARE_MAC_BUF(mac); + struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), &sdata->vif, + STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", - sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); + printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", + sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -669,38 +684,37 @@ static int ap_sta_ps_end(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_tx_info *info; - DECLARE_MAC_BUF(mac); atomic_dec(&sdata->bss->num_sta_ps); clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), &sdata->vif, + STA_NOTIFY_AWAKE, &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", - sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); + printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", + sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - info = IEEE80211_SKB_CB(skb); sent++; - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - info = IEEE80211_SKB_CB(skb); local->total_ps_buffered--; sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " + printk(KERN_DEBUG "%s: STA %pM aid %d send PS frame " "since STA not sleeping anymore\n", sdata->dev->name, - print_mac(mac, sta->sta.addr), sta->sta.aid); + sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } @@ -745,17 +759,29 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->last_qual = rx->status->qual; sta->last_noise = rx->status->noise; + /* + * Change STA power saving mode only at the end of a frame + * exchange sequence. + */ if (!ieee80211_has_morefrags(hdr->frame_control) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { - /* Change STA power saving mode only in the end of a frame - * exchange sequence */ - if (test_sta_flags(sta, WLAN_STA_PS) && - !ieee80211_has_pm(hdr->frame_control)) - rx->sent_ps_buffered += ap_sta_ps_end(sta); - else if (!test_sta_flags(sta, WLAN_STA_PS) && - ieee80211_has_pm(hdr->frame_control)) - ap_sta_ps_start(sta); + if (test_sta_flags(sta, WLAN_STA_PS)) { + /* + * Ignore doze->wake transitions that are + * indicated by non-data frames, the standard + * is unclear here, but for example going to + * PS mode and then scanning would cause a + * doze->wake transition for the probe request, + * and that is clearly undesirable. + */ + if (ieee80211_is_data(hdr->frame_control) && + !ieee80211_has_pm(hdr->frame_control)) + rx->sent_ps_buffered += ap_sta_ps_end(sta); + } else { + if (ieee80211_has_pm(hdr->frame_control)) + ap_sta_ps_start(sta); + } } /* Drop data::nullfunc frames silently, since they are used only to @@ -789,15 +815,12 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - DECLARE_MAC_BUF(mac); - DECLARE_MAC_BUF(mac2); printk(KERN_DEBUG "%s: RX reassembly removed oldest " "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " - "addr1=%s addr2=%s\n", + "addr1=%pM addr2=%pM\n", sdata->dev->name, idx, jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, print_mac(mac, hdr->addr1), - print_mac(mac2, hdr->addr2)); + entry->last_frag, hdr->addr1, hdr->addr2); #endif __skb_queue_purge(&entry->skb_list); } @@ -866,7 +889,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; - DECLARE_MAC_BUF(mac); hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; @@ -970,7 +992,6 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); struct sk_buff *skb; int no_pending_pkts; - DECLARE_MAC_BUF(mac); __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || @@ -1001,8 +1022,8 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) set_sta_flags(rx->sta, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", - print_mac(mac, rx->sta->sta.addr), rx->sta->sta.aid, + printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", + rx->sta->sta.addr, rx->sta->sta.aid, skb_queue_len(&rx->sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -1025,9 +1046,9 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) * Should we send it a null-func frame indicating we * have nothing buffered for it? */ - printk(KERN_DEBUG "%s: STA %s sent PS Poll even " + printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " "though there are no buffered frames for it\n", - rx->dev->name, print_mac(mac, rx->sta->sta.addr)); + rx->dev->name, rx->sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -1097,10 +1118,6 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) u8 src[ETH_ALEN] __aligned(2); struct sk_buff *skb = rx->skb; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - DECLARE_MAC_BUF(mac); - DECLARE_MAC_BUF(mac2); - DECLARE_MAC_BUF(mac3); - DECLARE_MAC_BUF(mac4); if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return -1; @@ -1279,7 +1296,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) int remaining, err; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; - DECLARE_MAC_BUF(mac); if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -1552,14 +1568,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) return RX_DROP_MONITOR; - /* - * FIXME: revisit this, I'm sure we should handle most - * of these frames in other modes as well! - */ - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC) - return RX_CONTINUE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: switch (mgmt->u.action.u.addba_req.action_code) { @@ -1632,8 +1640,6 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, { int keyidx; unsigned int hdrlen; - DECLARE_MAC_BUF(mac); - DECLARE_MAC_BUF(mac2); hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->skb->len >= hdrlen + 4) @@ -1854,10 +1860,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, if (!(sdata->dev->flags & IFF_PROMISC)) return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) - rx->sta = ieee80211_ibss_add_sta(sdata, rx->skb, - bssid, hdr->addr2, - BIT(rx->status->rate_idx)); + } else if (!rx->sta) { + int rate_idx; + if (rx->status->flag & RX_FLAG_HT) + rate_idx = 0; /* TODO: HT rates */ + else + rate_idx = rx->status->rate_idx; + rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2, + BIT(rate_idx)); + } break; case NL80211_IFTYPE_MESH_POINT: if (!multicast && @@ -2002,17 +2013,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, static inline int seq_less(u16 sq1, u16 sq2) { - return (((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1)); + return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); } static inline u16 seq_inc(u16 sq) { - return ((sq + 1) & SEQ_MASK); + return (sq + 1) & SEQ_MASK; } static inline u16 seq_sub(u16 sq1, u16 sq2) { - return ((sq1 - sq2) & SEQ_MASK); + return (sq1 - sq2) & SEQ_MASK; } @@ -2020,10 +2031,11 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) * As it function blongs to Rx path it must be called with * the proper rcu_read_lock protection for its flow. */ -u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb, u16 mpdu_seq_num, - int bar_req) +static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, + struct tid_ampdu_rx *tid_agg_rx, + struct sk_buff *skb, + u16 mpdu_seq_num, + int bar_req) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rx_status status; @@ -2062,7 +2074,13 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, tid_agg_rx->reorder_buf[index]->cb, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; - rate = &sband->bitrates[status.rate_idx]; + if (status.flag & RX_FLAG_HT) { + /* TODO: HT rates */ + rate = sband->bitrates; + } else { + rate = &sband->bitrates + [status.rate_idx]; + } __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], &status, rate); @@ -2106,7 +2124,10 @@ u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, memcpy(&status, tid_agg_rx->reorder_buf[index]->cb, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; - rate = &sband->bitrates[status.rate_idx]; + if (status.flag & RX_FLAG_HT) + rate = sband->bitrates; /* TODO: HT rates */ + else + rate = &sband->bitrates[status.rate_idx]; __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], &status, rate); tid_agg_rx->stored_mpdu_num--; @@ -2194,15 +2215,26 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, } sband = local->hw.wiphy->bands[status->band]; - - if (!sband || - status->rate_idx < 0 || - status->rate_idx >= sband->n_bitrates) { + if (!sband) { WARN_ON(1); return; } - rate = &sband->bitrates[status->rate_idx]; + if (status->flag & RX_FLAG_HT) { + /* rate_idx is MCS index */ + if (WARN_ON(status->rate_idx < 0 || + status->rate_idx >= 76)) + return; + /* HT rates are not in the table - use the highest legacy rate + * for now since other parts of mac80211 may not yet be fully + * MCS aware. */ + rate = &sband->bitrates[sband->n_bitrates - 1]; + } else { + if (WARN_ON(status->rate_idx < 0 || + status->rate_idx >= sband->n_bitrates)) + return; + rate = &sband->bitrates[status->rate_idx]; + } /* * key references and virtual interfaces are protected using RCU diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 416bb41099f..f5c7c337192 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -159,7 +159,7 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i { struct ieee80211_bss *bss; - if (mesh_config_len != MESH_CFG_LEN) + if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN) return NULL; bss = kzalloc(sizeof(*bss), GFP_ATOMIC); @@ -448,18 +448,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) if (local->hw_scanning) { local->hw_scanning = false; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", wiphy_name(local->hw.wiphy)); - + /* + * Somebody might have requested channel change during scan + * that we won't have acted upon, try now. ieee80211_hw_config + * will set the flag based on actual changes. + */ + ieee80211_hw_config(local, 0); goto done; } local->sw_scanning = false; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", wiphy_name(local->hw.wiphy)); - + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); netif_tx_lock_bh(local->mdev); netif_addr_lock(local->mdev); @@ -546,12 +545,9 @@ void ieee80211_scan_work(struct work_struct *work) if (!skip) { local->scan_channel = chan; - if (ieee80211_hw_config(local)) { - printk(KERN_DEBUG "%s: failed to set freq to " - "%d MHz for scan\n", wiphy_name(local->hw.wiphy), - chan->center_freq); + if (ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_CHANNEL)) skip = 1; - } } /* advance state machine to next channel/band */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d254446b85b..10c5539c20a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -137,14 +137,12 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, static void __sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { - DECLARE_MAC_BUF(mbuf); - rate_control_free_sta(sta); rate_control_put(sta->rate_ctrl); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Destroyed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); + printk(KERN_DEBUG "%s: Destroyed STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ kfree(sta); @@ -222,7 +220,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; int i; - DECLARE_MAC_BUF(mbuf); sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); if (!sta) @@ -263,8 +260,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sta->tx_filtered); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Allocated STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); + printk(KERN_DEBUG "%s: Allocated STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_MESH @@ -281,7 +278,6 @@ int sta_info_insert(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; unsigned long flags; int err = 0; - DECLARE_MAC_BUF(mac); /* * Can't be a WARN_ON because it can be triggered through a race: @@ -294,7 +290,7 @@ int sta_info_insert(struct sta_info *sta) } if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 || - is_multicast_ether_addr(sta->sta.addr))) { + is_multicast_ether_addr(sta->sta.addr))) { err = -EINVAL; goto out_free; } @@ -322,8 +318,8 @@ int sta_info_insert(struct sta_info *sta) } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Inserted STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mac, sta->sta.addr)); + printk(KERN_DEBUG "%s: Inserted STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ spin_unlock_irqrestore(&local->sta_lock, flags); @@ -423,9 +419,6 @@ static void __sta_info_unlink(struct sta_info **sta) { struct ieee80211_local *local = (*sta)->local; struct ieee80211_sub_if_data *sdata = (*sta)->sdata; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - DECLARE_MAC_BUF(mbuf); -#endif /* * pull caller's reference if we're already gone. */ @@ -468,8 +461,8 @@ static void __sta_info_unlink(struct sta_info **sta) } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Removed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->sta.addr)); + printk(KERN_DEBUG "%s: Removed STA %pM\n", + wiphy_name(local->hw.wiphy), (*sta)->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ /* @@ -544,7 +537,6 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, unsigned long flags; struct sk_buff *skb; struct ieee80211_sub_if_data *sdata; - DECLARE_MAC_BUF(mac); if (skb_queue_empty(&sta->ps_tx_buf)) return; @@ -564,8 +556,8 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, sdata = sta->sdata; local->total_ps_buffered--; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "Buffered frame expired (STA " - "%s)\n", print_mac(mac, sta->sta.addr)); + printk(KERN_DEBUG "Buffered frame expired (STA %pM)\n", + sta->sta.addr); #endif dev_kfree_skb(skb); @@ -809,15 +801,14 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; LIST_HEAD(tmp_list); - DECLARE_MAC_BUF(mac); unsigned long flags; spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + exp_time)) { #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: expiring inactive STA %s\n", - sdata->dev->name, print_mac(mac, sta->sta.addr)); + printk(KERN_DEBUG "%s: expiring inactive STA %pM\n", + sdata->dev->name, sta->sta.addr); #endif __sta_info_unlink(&sta); if (sta) @@ -830,7 +821,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, } struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, - const u8 *addr) + const u8 *addr) { struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 168a39a298b..dc2606d0ae7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -160,18 +160,17 @@ struct sta_ampdu_mlme { * @list: global linked list entry * @hnext: hash table linked list pointer * @local: pointer to the global information - * @sdata: TBD - * @key: TBD - * @rate_ctrl: TBD - * @rate_ctrl_priv: TBD + * @sdata: virtual interface this station belongs to + * @key: peer key negotiated with this station, if any + * @rate_ctrl: rate control algorithm reference + * @rate_ctrl_priv: rate control private per-STA pointer + * @last_tx_rate: rate used for last transmit, to report to userspace as + * "the" transmit rate * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses - * @addr: MAC address of this STA - * @aid: STA's unique AID (1..2007, 0 = not assigned yet), - * only used in AP (and IBSS?) mode - * @listen_interval: TBD - * @pin_status: TBD + * @listen_interval: listen interval of this station, when we're acting as AP + * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags * @ps_tx_buf: buffer of frames to transmit to this station * when it leaves power saving state @@ -180,8 +179,8 @@ struct sta_ampdu_mlme { * power saving state * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA - * @wep_weak_iv_count: TBD - * @last_rx: TBD + * @wep_weak_iv_count: number of weak WEP IVs received from this station + * @last_rx: time (in jiffies) when last frame was received from this STA * @num_duplicates: number of duplicate frames received from this STA * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA @@ -189,26 +188,26 @@ struct sta_ampdu_mlme { * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) - * @tx_filtered_count: TBD - * @tx_retry_failed: TBD - * @tx_retry_count: TBD + * @tx_filtered_count: number of frames the hardware filtered for this STA + * @tx_retry_failed: number of frames that failed retry + * @tx_retry_count: total number of retries for frames to this STA * @fail_avg: moving percentage of failed MSDUs * @tx_packets: number of RX/TX MSDUs - * @tx_bytes: TBD + * @tx_bytes: number of bytes transmitted to this STA * @tx_fragments: number of transmitted MPDUs - * @last_txrate_idx: Index of the last used transmit rate - * @tid_seq: TBD - * @ampdu_mlme: TBD + * @last_txrate: description of the last used transmit rate + * @tid_seq: per-TID sequence numbers for sending to this STA + * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers * @tid_to_tx_q: map tid to tx queue * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state * @plink_retries: Retries in establishment - * @ignore_plink_timer: TBD - * @plink_state plink_state: TBD - * @plink_timeout: TBD - * @plink_timer: TBD + * @ignore_plink_timer: ignore the peer-link timer (used internally) + * @plink_state: peer link state + * @plink_timeout: timeout of peer link + * @plink_timer: peer link watch timer * @debugfs: debug filesystem info * @sta: station information we share with the driver */ @@ -267,7 +266,7 @@ struct sta_info { unsigned long tx_packets; unsigned long tx_bytes; unsigned long tx_fragments; - unsigned int last_txrate_idx; + struct ieee80211_tx_rate last_tx_rate; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; /* diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 34b32bc8f60..38fa111d2dc 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -263,10 +263,9 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, (iv32 == key->u.tkip.rx[queue].iv32 && iv16 <= key->u.tkip.rx[queue].iv16))) { #ifdef CONFIG_MAC80211_TKIP_DEBUG - DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "TKIP replay detected for RX frame from " - "%s (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", - print_mac(mac, ta), + "%pM (RX IV (%04x,%02x) <= prev. IV (%04x,%02x)\n", + ta, iv32, iv16, key->u.tkip.rx[queue].iv32, key->u.tkip.rx[queue].iv16); #endif @@ -287,9 +286,8 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, { int i; u8 key_offset = NL80211_TKIP_DATA_OFFSET_ENCR_KEY; - DECLARE_MAC_BUF(mac); - printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%s" - " TK=", print_mac(mac, ta)); + printk(KERN_DEBUG "TKIP decrypt: Phase1 TA=%pM" + " TK=", ta); for (i = 0; i < 16; i++) printk("%02x ", key->conf.key[key_offset + i]); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1460537faf3..a4af3a124cc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -46,13 +46,20 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; struct ieee80211_hdr *hdr; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + + /* assume HW handles this */ + if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) + return 0; + + /* uh huh? */ + if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) + return 0; sband = local->hw.wiphy->bands[tx->channel->band]; - txrate = &sband->bitrates[tx->rate_idx]; + txrate = &sband->bitrates[info->control.rates[0].idx]; - erp = 0; - if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = txrate->flags & IEEE80211_RATE_ERP_G; + erp = txrate->flags & IEEE80211_RATE_ERP_G; /* * data and mgmt (except PS Poll): @@ -116,7 +123,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, if (r->bitrate > txrate->bitrate) break; - if (tx->sdata->bss_conf.basic_rates & BIT(i)) + if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) rate = r->bitrate; switch (sband->band) { @@ -150,7 +157,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, * to closest integer */ dur = ieee80211_frame_duration(local, 10, rate, erp, - tx->sdata->bss_conf.use_short_preamble); + tx->sdata->vif.bss_conf.use_short_preamble); if (next_frag_len) { /* Frame is fragmented: duration increases with time needed to @@ -159,7 +166,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, /* next fragment */ dur += ieee80211_frame_duration(local, next_frag_len, txrate->bitrate, erp, - tx->sdata->bss_conf.use_short_preamble); + tx->sdata->vif.bss_conf.use_short_preamble); } return cpu_to_le16(dur); @@ -201,10 +208,9 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: dropped data frame to not " - "associated station %s\n", - tx->dev->name, print_mac(mac, hdr->addr1)); + "associated station %pM\n", + tx->dev->name, hdr->addr1); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; @@ -331,7 +337,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; u32 staflags; - DECLARE_MAC_BUF(mac); if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control))) return TX_CONTINUE; @@ -341,9 +346,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) if (unlikely((staflags & WLAN_STA_PS) && !(staflags & WLAN_STA_PSPOLL))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " + printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries " "before %d)\n", - print_mac(mac, sta->sta.addr), sta->sta.aid, + sta->sta.addr, sta->sta.aid, skb_queue_len(&sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -352,9 +357,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA %s TX " + printk(KERN_DEBUG "%s: STA %pM TX " "buffer full - dropping oldest frame\n", - tx->dev->name, print_mac(mac, sta->sta.addr)); + tx->dev->name, sta->sta.addr); } #endif dev_kfree_skb(old); @@ -371,9 +376,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { - printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " + printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, - print_mac(mac, sta->sta.addr)); + sta->sta.addr); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ clear_sta_flags(sta, WLAN_STA_PSPOLL); @@ -439,140 +444,154 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) { - struct rate_selection rsel; - struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (void *)tx->skb->data; + struct ieee80211_supported_band *sband; + struct ieee80211_rate *rate; + int i, len; + bool inval = false, rts = false, short_preamble = false; + struct ieee80211_tx_rate_control txrc; - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + memset(&txrc, 0, sizeof(txrc)); - if (likely(tx->rate_idx < 0)) { - rate_control_get_rate(tx->sdata, sband, tx->sta, - tx->skb, &rsel); - if (tx->sta) - tx->sta->last_txrate_idx = rsel.rate_idx; - tx->rate_idx = rsel.rate_idx; - if (unlikely(rsel.probe_idx >= 0)) { - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - info->control.retries[0].rate_idx = tx->rate_idx; - info->control.retries[0].limit = tx->local->hw.max_altrate_tries; - tx->rate_idx = rsel.probe_idx; - } else if (info->control.retries[0].limit == 0) - info->control.retries[0].rate_idx = -1; - - if (unlikely(tx->rate_idx < 0)) - return TX_DROP; - } else - info->control.retries[0].rate_idx = -1; + sband = tx->local->hw.wiphy->bands[tx->channel->band]; - if (tx->sdata->bss_conf.use_cts_prot && - (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { - tx->last_frag_rate_idx = tx->rate_idx; - if (rsel.probe_idx >= 0) - tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; - else - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->rate_idx = rsel.nonerp_idx; - info->tx_rate_idx = rsel.nonerp_idx; - info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; - } else { - tx->last_frag_rate_idx = tx->rate_idx; - info->tx_rate_idx = tx->rate_idx; + len = min_t(int, tx->skb->len + FCS_LEN, + tx->local->fragmentation_threshold); + + /* set up the tx rate control struct we give the RC algo */ + txrc.hw = local_to_hw(tx->local); + txrc.sband = sband; + txrc.bss_conf = &tx->sdata->vif.bss_conf; + txrc.skb = tx->skb; + txrc.reported_rate.idx = -1; + txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx; + + /* set up RTS protection if desired */ + if (tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD && + len > tx->local->rts_threshold) { + txrc.rts = rts = true; } - info->tx_rate_idx = tx->rate_idx; - return TX_CONTINUE; -} + /* + * Use short preamble if the BSS can handle it, but not for + * management frames unless we know the receiver can handle + * that -- the management frame might be to a station that + * just wants a probe response. + */ + if (tx->sdata->vif.bss_conf.use_short_preamble && + (ieee80211_is_data(hdr->frame_control) || + (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) + txrc.short_preamble = short_preamble = true; -static ieee80211_tx_result debug_noinline -ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_supported_band *sband; - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + rate_control_get_rate(tx->sdata, tx->sta, &txrc); + + if (unlikely(info->control.rates[0].idx < 0)) + return TX_DROP; + + if (txrc.reported_rate.idx < 0) + txrc.reported_rate = info->control.rates[0]; if (tx->sta) - info->control.sta = &tx->sta->sta; + tx->sta->last_tx_rate = txrc.reported_rate; - if (!info->control.retry_limit) { - if (!is_multicast_ether_addr(hdr->addr1)) { - int len = min_t(int, tx->skb->len + FCS_LEN, - tx->local->fragmentation_threshold); - if (len > tx->local->rts_threshold - && tx->local->rts_threshold < - IEEE80211_MAX_RTS_THRESHOLD) { - info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS; - info->flags |= - IEEE80211_TX_CTL_LONG_RETRY_LIMIT; - info->control.retry_limit = - tx->local->long_retry_limit; - } else { - info->control.retry_limit = - tx->local->short_retry_limit; - } - } else { - info->control.retry_limit = 1; - } - } + if (unlikely(!info->control.rates[0].count)) + info->control.rates[0].count = 1; - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - info->control.retries[0].rate_idx = -1; + if (is_multicast_ether_addr(hdr->addr1)) { + /* + * XXX: verify the rate is in the basic rateset + */ + return TX_CONTINUE; } - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && - (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) && - (tx->flags & IEEE80211_TX_UNICAST) && - tx->sdata->bss_conf.use_cts_prot && - !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)) - info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT; - - /* Transmit data frames using short preambles if the driver supports - * short preambles at the selected rate and short preambles are - * available on the network at the current point in time. */ - if (ieee80211_is_data(hdr->frame_control) && - (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) && - tx->sdata->bss_conf.use_short_preamble && - (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) { - info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + /* + * set up the RTS/CTS rate as the fastest basic rate + * that is not faster than the data rate + * + * XXX: Should this check all retry rates? + */ + if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) { + s8 baserate = 0; + + rate = &sband->bitrates[info->control.rates[0].idx]; + + for (i = 0; i < sband->n_bitrates; i++) { + /* must be a basic rate */ + if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i))) + continue; + /* must not be faster than the data rate */ + if (sband->bitrates[i].bitrate > rate->bitrate) + continue; + /* maximum */ + if (sband->bitrates[baserate].bitrate < + sband->bitrates[i].bitrate) + baserate = i; + } + + info->control.rts_cts_rate_idx = baserate; } - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { - struct ieee80211_rate *rate; - s8 baserate = -1; - int idx; + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* + * make sure there's no valid rate following + * an invalid one, just in case drivers don't + * take the API seriously to stop at -1. + */ + if (inval) { + info->control.rates[i].idx = -1; + continue; + } + if (info->control.rates[i].idx < 0) { + inval = true; + continue; + } - /* Do not use multiple retry rates when using RTS/CTS */ - info->control.retries[0].rate_idx = -1; + /* + * For now assume MCS is already set up correctly, this + * needs to be fixed. + */ + if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) { + WARN_ON(info->control.rates[i].idx > 76); + continue; + } - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = &sband->bitrates[tx->rate_idx]; + /* set up RTS protection if desired */ + if (rts) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_RTS_CTS; - for (idx = 0; idx < sband->n_bitrates; idx++) { - if (sband->bitrates[idx].bitrate > rate->bitrate) - continue; - if (tx->sdata->bss_conf.basic_rates & BIT(idx) && - (baserate < 0 || - (sband->bitrates[baserate].bitrate - < sband->bitrates[idx].bitrate))) - baserate = idx; + /* RC is busted */ + if (WARN_ON_ONCE(info->control.rates[i].idx >= + sband->n_bitrates)) { + info->control.rates[i].idx = -1; + continue; } - if (baserate >= 0) - info->control.rts_cts_rate_idx = baserate; - else - info->control.rts_cts_rate_idx = 0; + rate = &sband->bitrates[info->control.rates[i].idx]; + + /* set up short preamble */ + if (short_preamble && + rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + + /* set up G protection */ + if (!rts && tx->sdata->vif.bss_conf.use_cts_prot && + rate->flags & IEEE80211_RATE_ERP_G) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; } + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + if (tx->sta) info->control.sta = &tx->sta->sta; @@ -602,8 +621,18 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) if (ieee80211_hdrlen(hdr->frame_control) < 24) return TX_CONTINUE; + /* + * Anything but QoS data that has a sequence number field + * (is long enough) gets a sequence number from the global + * counter. + */ if (!ieee80211_is_data_qos(hdr->frame_control)) { + /* driver should assign sequence number */ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + /* for pure STA mode without beacons, we can do it */ + hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); + tx->sdata->sequence_number += 0x10; + tx->sdata->sequence_number &= IEEE80211_SCTL_SEQ; return TX_CONTINUE; } @@ -632,6 +661,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; size_t hdrlen, per_fragm, num_fragm, payload_len, left; struct sk_buff **frags, *first, *frag; @@ -648,9 +678,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) * This scenario is handled in __ieee80211_tx_prepare but extra * caution taken here as fragmented ampdu may cause Tx stop. */ - if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU || - skb_get_queue_mapping(tx->skb) >= - ieee80211_num_regular_queues(&tx->local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) return TX_DROP; first = tx->skb; @@ -684,20 +712,45 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) IEEE80211_ENCRYPT_TAILROOM); if (!frag) goto fail; + /* Make sure that all fragments use the same priority so * that they end up using the same TX queue */ frag->priority = first->priority; + skb_reserve(frag, tx->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); + + /* copy TX information */ + info = IEEE80211_SKB_CB(frag); + memcpy(info, first->cb, sizeof(frag->cb)); + + /* copy/fill in 802.11 header */ fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); memcpy(fhdr, first->data, hdrlen); - if (i == num_fragm - 2) - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + + if (i == num_fragm - 2) { + /* clear MOREFRAGS bit for the last fragment */ + fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); + } else { + /* + * No multi-rate retries for fragmented frames, that + * would completely throw off the NAV at other STAs. + */ + info->control.rates[1].idx = -1; + info->control.rates[2].idx = -1; + info->control.rates[3].idx = -1; + info->control.rates[4].idx = -1; + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); + info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } + + /* copy data */ copylen = left > per_fragm ? per_fragm : left; memcpy(skb_put(frag, copylen), pos, copylen); - memcpy(frag->cb, first->cb, sizeof(frag->cb)); + skb_copy_queue_mapping(frag, first); + frag->do_not_encrypt = first->do_not_encrypt; pos += copylen; @@ -757,12 +810,10 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) tx->extra_frag[0]->len); for (i = 0; i < tx->num_extra_frag; i++) { - if (i + 1 < tx->num_extra_frag) { + if (i + 1 < tx->num_extra_frag) next_len = tx->extra_frag[i + 1]->len; - } else { + else next_len = 0; - tx->rate_idx = tx->last_frag_rate_idx; - } hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; hdr->duration_id = ieee80211_duration(tx, 0, next_len); @@ -815,7 +866,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); sband = tx->local->hw.wiphy->bands[tx->channel->band]; @@ -829,8 +879,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, */ while (!ret) { - int i, target_rate; - ret = ieee80211_radiotap_iterator_next(&iterator); if (ret) @@ -844,38 +892,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, * get_unaligned((type *)iterator.this_arg) to dereference * iterator.this_arg for type "type" safely on all arches. */ - case IEEE80211_RADIOTAP_RATE: - /* - * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps - * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps - */ - target_rate = (*iterator.this_arg) * 5; - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *r; - - r = &sband->bitrates[i]; - - if (r->bitrate == target_rate) { - tx->rate_idx = i; - break; - } - } - break; - - case IEEE80211_RADIOTAP_ANTENNA: - /* - * radiotap uses 0 for 1st ant, mac80211 is 1 for - * 1st ant - */ - info->antenna_sel_tx = (*iterator.this_arg) + 1; - break; - -#if 0 - case IEEE80211_RADIOTAP_DBM_TX_POWER: - control->power_level = *iterator.this_arg; - break; -#endif - case IEEE80211_RADIOTAP_FLAGS: if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { /* @@ -933,7 +949,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen; + int hdrlen, tid; + u8 *qc, *state; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -941,8 +958,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->local = local; tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); tx->channel = local->hw.conf.channel; - tx->rate_idx = -1; - tx->last_frag_rate_idx = -1; /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. @@ -966,6 +981,15 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->sta = sta_info_get(local, hdr->addr1); + if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { + qc = ieee80211_get_qos_ctl(hdr); + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + + state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; + if (*state == HT_AGG_STATE_OPERATIONAL) + info->flags |= IEEE80211_TX_CTL_AMPDU; + } + if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; info->flags |= IEEE80211_TX_CTL_NO_ACK; @@ -977,7 +1001,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, if (tx->flags & IEEE80211_TX_FRAGMENTED) { if ((tx->flags & IEEE80211_TX_UNICAST) && skb->len + FCS_LEN > local->fragmentation_threshold && - !local->ops->set_frag_threshold && !(info->flags & IEEE80211_TX_CTL_AMPDU)) tx->flags |= IEEE80211_TX_FRAGMENTED; else @@ -1043,23 +1066,11 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, if (!tx->extra_frag[i]) continue; info = IEEE80211_SKB_CB(tx->extra_frag[i]); - info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS | - IEEE80211_TX_CTL_USE_CTS_PROTECT | - IEEE80211_TX_CTL_CLEAR_PS_FILT | + info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); if (netif_subqueue_stopped(local->mdev, tx->extra_frag[i])) return IEEE80211_TX_FRAG_AGAIN; - if (i == tx->num_extra_frag) { - info->tx_rate_idx = tx->last_frag_rate_idx; - - if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG) - info->flags |= - IEEE80211_TX_CTL_RATE_CTRL_PROBE; - else - info->flags &= - ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; - } ret = local->ops->tx(local_to_hw(local), tx->extra_frag[i]); @@ -1168,7 +1179,7 @@ retry: * queues, there's no reason for a driver to reject * a frame there, warn and drop it. */ - if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) goto drop; store = &local->pending_packet[queue]; @@ -1196,9 +1207,6 @@ retry: store->skb = skb; store->extra_frag = tx.extra_frag; store->num_extra_frag = tx.num_extra_frag; - store->last_frag_rate_idx = tx.last_frag_rate_idx; - store->last_frag_rate_ctrl_probe = - !!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG); } out: rcu_read_unlock(); @@ -1465,6 +1473,19 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + local->dynamic_ps_timeout > 0) { + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_PS); + queue_work(local->hw.workqueue, + &local->dynamic_ps_disable_work); + } + + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + } + nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; @@ -1593,12 +1614,10 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, compare_ether_addr(dev->dev_addr, skb->data + ETH_ALEN) == 0))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - DECLARE_MAC_BUF(mac); - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped frame to %s" + printk(KERN_DEBUG "%s: dropped frame to %pM" " (unauthorized port)\n", dev->name, - print_mac(mac, hdr.addr1)); + hdr.addr1); #endif I802_DEBUG_INC(local->tx_handlers_drop_unauth_port); @@ -1757,10 +1776,7 @@ void ieee80211_tx_pending(unsigned long data) store = &local->pending_packet[i]; tx.extra_frag = store->extra_frag; tx.num_extra_frag = store->num_extra_frag; - tx.last_frag_rate_idx = store->last_frag_rate_idx; tx.flags = 0; - if (store->last_frag_rate_ctrl_probe) - tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG; ret = __ieee80211_tx(local, store->skb, &tx); if (ret) { if (ret == IEEE80211_TX_FRAG_AGAIN) @@ -1775,8 +1791,7 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ -static void ieee80211_beacon_add_tim(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, +static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, struct sk_buff *skb, struct beacon_data *beacon) { @@ -1844,11 +1859,9 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; struct ieee80211_tx_info *info; - struct net_device *bdev; struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; struct ieee80211_if_sta *ifsta = NULL; - struct rate_selection rsel; struct beacon_data *beacon; struct ieee80211_supported_band *sband; enum ieee80211_band band = local->hw.conf.channel->band; @@ -1858,7 +1871,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, rcu_read_lock(); sdata = vif_to_sdata(vif); - bdev = sdata->dev; if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; @@ -1886,12 +1898,12 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - ieee80211_beacon_add_tim(local, ap, skb, beacon); + ieee80211_beacon_add_tim(ap, skb, beacon); } else { unsigned long flags; spin_lock_irqsave(&local->sta_lock, flags); - ieee80211_beacon_add_tim(local, ap, skb, beacon); + ieee80211_beacon_add_tim(ap, skb, beacon); spin_unlock_irqrestore(&local->sta_lock, flags); } @@ -1952,33 +1964,23 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, skb->do_not_encrypt = 1; info->band = band; - rate_control_get_rate(sdata, sband, NULL, skb, &rsel); - - if (unlikely(rsel.rate_idx < 0)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: " - "no rate found\n", - wiphy_name(local->hw.wiphy)); - } - dev_kfree_skb_any(skb); - skb = NULL; - goto out; - } + /* + * XXX: For now, always use the lowest rate + */ + info->control.rates[0].idx = 0; + info->control.rates[0].count = 1; + info->control.rates[1].idx = -1; + info->control.rates[2].idx = -1; + info->control.rates[3].idx = -1; + info->control.rates[4].idx = -1; + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); info->control.vif = vif; - info->tx_rate_idx = rsel.rate_idx; info->flags |= IEEE80211_TX_CTL_NO_ACK; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - if (sdata->bss_conf.use_short_preamble && - sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) - info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; - - info->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - info->control.retry_limit = 1; - -out: + out: rcu_read_unlock(); return skb; } @@ -2023,14 +2025,12 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct sk_buff *skb = NULL; struct sta_info *sta; struct ieee80211_tx_data tx; - struct net_device *bdev; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_ap *bss = NULL; struct beacon_data *beacon; struct ieee80211_tx_info *info; sdata = vif_to_sdata(vif); - bdev = sdata->dev; bss = &sdata->u.ap; if (!bss) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cee4884b9d0..fb89e1d0aa0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -239,7 +239,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } @@ -272,7 +272,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } @@ -312,7 +312,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } @@ -330,10 +330,20 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_ctstoself_duration); -void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) +static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); + /* we don't need to track ampdu queues */ + if (queue < ieee80211_num_regular_queues(hw)) { + __clear_bit(reason, &local->queue_stop_reasons[queue]); + + if (local->queue_stop_reasons[queue] != 0) + /* someone still has this queue stopped */ + return; + } + if (test_bit(queue, local->queues_pending)) { set_bit(queue, local->queues_pending_run); tasklet_schedule(&local->tx_pending_tasklet); @@ -341,22 +351,74 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) netif_wake_subqueue(local->mdev, queue); } } + +void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) +{ + struct ieee80211_local *local = hw_to_local(hw); + unsigned long flags; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + __ieee80211_wake_queue(hw, queue, reason); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) +{ + ieee80211_wake_queue_by_reason(hw, queue, + IEEE80211_QUEUE_STOP_REASON_DRIVER); +} EXPORT_SYMBOL(ieee80211_wake_queue); -void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) +static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); + /* we don't need to track ampdu queues */ + if (queue < ieee80211_num_regular_queues(hw)) + __set_bit(reason, &local->queue_stop_reasons[queue]); + netif_stop_subqueue(local->mdev, queue); } + +void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) +{ + struct ieee80211_local *local = hw_to_local(hw); + unsigned long flags; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + __ieee80211_stop_queue(hw, queue, reason); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) +{ + ieee80211_stop_queue_by_reason(hw, queue, + IEEE80211_QUEUE_STOP_REASON_DRIVER); +} EXPORT_SYMBOL(ieee80211_stop_queue); -void ieee80211_stop_queues(struct ieee80211_hw *hw) +void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, + enum queue_stop_reason reason) { + struct ieee80211_local *local = hw_to_local(hw); + unsigned long flags; int i; + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (i = 0; i < ieee80211_num_queues(hw); i++) - ieee80211_stop_queue(hw, i); + __ieee80211_stop_queue(hw, i, reason); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +void ieee80211_stop_queues(struct ieee80211_hw *hw) +{ + ieee80211_stop_queues_by_reason(hw, + IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -367,12 +429,24 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) } EXPORT_SYMBOL(ieee80211_queue_stopped); -void ieee80211_wake_queues(struct ieee80211_hw *hw) +void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, + enum queue_stop_reason reason) { + struct ieee80211_local *local = hw_to_local(hw); + unsigned long flags; int i; + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (i = 0; i < hw->queues + hw->ampdu_queues; i++) - ieee80211_wake_queue(hw, i); + __ieee80211_wake_queue(hw, i, reason); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +void ieee80211_wake_queues(struct ieee80211_hw *hw) +{ + ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); } EXPORT_SYMBOL(ieee80211_wake_queues); @@ -532,8 +606,8 @@ void ieee802_11_parse_elems(u8 *start, size_t len, if (elen >= sizeof(struct ieee80211_ht_cap)) elems->ht_cap_elem = (void *)pos; break; - case WLAN_EID_HT_EXTRA_INFO: - if (elen >= sizeof(struct ieee80211_ht_addt_info)) + case WLAN_EID_HT_INFORMATION: + if (elen >= sizeof(struct ieee80211_ht_info)) elems->ht_info_elem = (void *)pos; break; case WLAN_EID_MESH_ID: @@ -638,19 +712,16 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - chan->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", sdata->dev->name, chan->center_freq); + chan->flags & IEEE80211_CHAN_NO_IBSS) return ret; - } local->oper_channel = chan; + local->oper_channel_type = NL80211_CHAN_NO_HT; if (local->sw_scanning || local->hw_scanning) ret = 0; else - ret = ieee80211_hw_config(local); - - rate_control_clear(local); + ret = ieee80211_hw_config( + local, IEEE80211_CONF_CHANGE_CHANNEL); } return ret; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index f0e2d3ecb5c..7043ddc7549 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/mm.h> #include <linux/scatterlist.h> +#include <asm/unaligned.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -49,17 +50,19 @@ void ieee80211_wep_free(struct ieee80211_local *local) crypto_free_blkcipher(local->wep_rx_tfm); } -static inline int ieee80211_wep_weak_iv(u32 iv, int keylen) +static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) { - /* Fluhrer, Mantin, and Shamir have reported weaknesses in the + /* + * Fluhrer, Mantin, and Shamir have reported weaknesses in the * key scheduling algorithm of RC4. At least IVs (KeyByte + 3, - * 0xff, N) can be used to speedup attacks, so avoid using them. */ + * 0xff, N) can be used to speedup attacks, so avoid using them. + */ if ((iv & 0xff00) == 0xff00) { u8 B = (iv >> 16) & 0xff; if (B >= 3 && B < 3 + keylen) - return 1; + return true; } - return 0; + return false; } @@ -123,10 +126,10 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, { struct blkcipher_desc desc = { .tfm = tfm }; struct scatterlist sg; - __le32 *icv; + __le32 icv; - icv = (__le32 *)(data + data_len); - *icv = cpu_to_le32(~crc32_le(~0, data, data_len)); + icv = cpu_to_le32(~crc32_le(~0, data, data_len)); + put_unaligned(icv, (__le32 *)(data + data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); @@ -268,7 +271,7 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, } -u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) +bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; unsigned int hdrlen; @@ -276,16 +279,13 @@ u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) u32 iv; if (!ieee80211_has_protected(hdr->frame_control)) - return NULL; + return false; hdrlen = ieee80211_hdrlen(hdr->frame_control); ivpos = skb->data + hdrlen; iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; - if (ieee80211_wep_weak_iv(iv, key->conf.keylen)) - return ivpos; - - return NULL; + return ieee80211_wep_weak_iv(iv, key->conf.keylen); } ieee80211_rx_result @@ -329,6 +329,8 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) { + int i; + ieee80211_tx_set_protected(tx); if (wep_encrypt_skb(tx, tx->skb) < 0) { @@ -337,9 +339,8 @@ ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) } if (tx->extra_frag) { - int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (wep_encrypt_skb(tx, tx->extra_frag[i]) < 0) { + if (wep_encrypt_skb(tx, tx->extra_frag[i])) { I802_DEBUG_INC(tx->local-> tx_handlers_drop_wep); return TX_DROP; diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index e587172115b..d3f0db48314 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -26,7 +26,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_key *key); -u8 *ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); +bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index ab4ddba874b..7162d5816f3 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -135,48 +135,6 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, return -EOPNOTSUPP; } -static int ieee80211_ioctl_giwname(struct net_device *dev, - struct iw_request_info *info, - char *name, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_supported_band *sband; - u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0; - - - sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ]; - if (sband) { - is_a = 1; - is_ht |= sband->ht_info.ht_supported; - } - - sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ]; - if (sband) { - int i; - /* Check for mandatory rates */ - for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) - is_b = 1; - if (sband->bitrates[i].bitrate == 60) - is_g = 1; - } - is_ht |= sband->ht_info.ht_supported; - } - - strcpy(name, "IEEE 802.11"); - if (is_a) - strcat(name, "a"); - if (is_b) - strcat(name, "b"); - if (is_g) - strcat(name, "g"); - if (is_ht) - strcat(name, "n"); - - return 0; -} - - static int ieee80211_ioctl_giwrange(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -266,78 +224,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, } -static int ieee80211_ioctl_siwmode(struct net_device *dev, - struct iw_request_info *info, - __u32 *mode, char *extra) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - int type; - - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return -EOPNOTSUPP; - - switch (*mode) { - case IW_MODE_INFRA: - type = NL80211_IFTYPE_STATION; - break; - case IW_MODE_ADHOC: - /* Setting ad-hoc mode on non ibss channel is not - * supported. - */ - if (local->oper_channel && - (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) - return -EOPNOTSUPP; - - type = NL80211_IFTYPE_ADHOC; - break; - case IW_MODE_REPEAT: - type = NL80211_IFTYPE_WDS; - break; - case IW_MODE_MONITOR: - type = NL80211_IFTYPE_MONITOR; - break; - default: - return -EINVAL; - } - - return ieee80211_if_change_type(sdata, type); -} - - -static int ieee80211_ioctl_giwmode(struct net_device *dev, - struct iw_request_info *info, - __u32 *mode, char *extra) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - *mode = IW_MODE_MASTER; - break; - case NL80211_IFTYPE_STATION: - *mode = IW_MODE_INFRA; - break; - case NL80211_IFTYPE_ADHOC: - *mode = IW_MODE_ADHOC; - break; - case NL80211_IFTYPE_MONITOR: - *mode = IW_MODE_MONITOR; - break; - case NL80211_IFTYPE_WDS: - *mode = IW_MODE_REPEAT; - break; - case NL80211_IFTYPE_AP_VLAN: - *mode = IW_MODE_SECOND; /* FIXME */ - break; - default: - *mode = IW_MODE_AUTO; - break; - } - return 0; -} - static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) @@ -415,13 +301,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, return 0; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - memcpy(sdata->u.ap.ssid, ssid, len); - memset(sdata->u.ap.ssid + len, 0, - IEEE80211_MAX_SSID_LEN - len); - sdata->u.ap.ssid_len = len; - return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - } return -EOPNOTSUPP; } @@ -445,15 +324,6 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - len = sdata->u.ap.ssid_len; - if (len > IW_ESSID_MAX_SIZE) - len = IW_ESSID_MAX_SIZE; - memcpy(ssid, sdata->u.ap.ssid, len); - data->length = len; - data->flags = 1; - return 0; - } return -EOPNOTSUPP; } @@ -548,8 +418,7 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_ADHOC && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT && - sdata->vif.type != NL80211_IFTYPE_AP) + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; /* if SSID was specified explicitly then use that */ @@ -644,8 +513,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta && sta->last_txrate_idx < sband->n_bitrates) - rate->value = sband->bitrates[sta->last_txrate_idx].bitrate; + if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) + rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; else rate->value = 0; @@ -664,45 +533,35 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - bool need_reconfig = 0; + struct ieee80211_channel* chan = local->hw.conf.channel; + u32 reconf_flags = 0; int new_power_level; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; if (data->txpower.flags & IW_TXPOW_RANGE) return -EINVAL; + if (!chan) + return -EINVAL; - if (data->txpower.fixed) { - new_power_level = data->txpower.value; - } else { - /* - * Automatic power level. Use maximum power for the current - * channel. Should be part of rate control. - */ - struct ieee80211_channel* chan = local->hw.conf.channel; - if (!chan) - return -EINVAL; - + if (data->txpower.fixed) + new_power_level = min(data->txpower.value, chan->max_power); + else /* Automatic power level setting */ new_power_level = chan->max_power; - } if (local->hw.conf.power_level != new_power_level) { local->hw.conf.power_level = new_power_level; - need_reconfig = 1; + reconf_flags |= IEEE80211_CONF_CHANGE_POWER; } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); - need_reconfig = 1; + reconf_flags |= IEEE80211_CONF_CHANGE_RADIO_ENABLED; ieee80211_led_radio(local, local->hw.conf.radio_enabled); } - if (need_reconfig) { - ieee80211_hw_config(local); - /* The return value of hw_config is not of big interest here, - * as it doesn't say that it failed because of _this_ config - * change or something else. Ignore it. */ - } + if (reconf_flags) + ieee80211_hw_config(local, reconf_flags); return 0; } @@ -779,14 +638,6 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, local->fragmentation_threshold = frag->value & ~0x1; } - /* If the wlan card performs fragmentation in hardware/firmware, - * configure it here */ - - if (local->ops->set_frag_threshold) - return local->ops->set_frag_threshold( - local_to_hw(local), - local->fragmentation_threshold); - return 0; } @@ -814,21 +665,16 @@ static int ieee80211_ioctl_siwretry(struct net_device *dev, (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; - if (retry->flags & IW_RETRY_MAX) - local->long_retry_limit = retry->value; - else if (retry->flags & IW_RETRY_MIN) - local->short_retry_limit = retry->value; - else { - local->long_retry_limit = retry->value; - local->short_retry_limit = retry->value; + if (retry->flags & IW_RETRY_MAX) { + local->hw.conf.long_frame_max_tx_count = retry->value; + } else if (retry->flags & IW_RETRY_MIN) { + local->hw.conf.short_frame_max_tx_count = retry->value; + } else { + local->hw.conf.long_frame_max_tx_count = retry->value; + local->hw.conf.short_frame_max_tx_count = retry->value; } - if (local->ops->set_retry_limit) { - return local->ops->set_retry_limit( - local_to_hw(local), - local->short_retry_limit, - local->long_retry_limit); - } + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); return 0; } @@ -845,14 +691,15 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, /* first return min value, iwconfig will ask max value * later if needed */ retry->flags |= IW_RETRY_LIMIT; - retry->value = local->short_retry_limit; - if (local->long_retry_limit != local->short_retry_limit) + retry->value = local->hw.conf.short_frame_max_tx_count; + if (local->hw.conf.long_frame_max_tx_count != + local->hw.conf.short_frame_max_tx_count) retry->flags |= IW_RETRY_MIN; return 0; } if (retry->flags & IW_RETRY_MAX) { retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX; - retry->value = local->long_retry_limit; + retry->value = local->hw.conf.long_frame_max_tx_count; } return 0; @@ -983,25 +830,56 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, struct iw_param *wrq, char *extra) { + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_conf *conf = &local->hw.conf; + int ret = 0, timeout = 0; + bool ps; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; if (wrq->disabled) { - conf->flags &= ~IEEE80211_CONF_PS; - return ieee80211_hw_config(local); + ps = false; + timeout = 0; + goto set; } switch (wrq->flags & IW_POWER_MODE) { case IW_POWER_ON: /* If not specified */ case IW_POWER_MODE: /* If set all mask */ case IW_POWER_ALL_R: /* If explicitely state all */ - conf->flags |= IEEE80211_CONF_PS; + ps = true; break; - default: /* Otherwise we don't support it */ - return -EINVAL; + default: /* Otherwise we ignore */ + break; + } + + if (wrq->flags & IW_POWER_TIMEOUT) + timeout = wrq->value / 1000; + +set: + if (ps == local->powersave && timeout == local->dynamic_ps_timeout) + return ret; + + local->powersave = ps; + local->dynamic_ps_timeout = timeout; + + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + local->dynamic_ps_timeout > 0) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + else { + if (local->powersave) + conf->flags |= IEEE80211_CONF_PS; + else + conf->flags &= ~IEEE80211_CONF_PS; + } + ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - return ieee80211_hw_config(local); + return ret; } static int ieee80211_ioctl_giwpower(struct net_device *dev, @@ -1010,9 +888,8 @@ static int ieee80211_ioctl_giwpower(struct net_device *dev, char *extra) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; - wrqu->power.disabled = !(conf->flags & IEEE80211_CONF_PS); + wrqu->power.disabled = !local->powersave; return 0; } @@ -1176,13 +1053,13 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, static const iw_handler ieee80211_handler[] = { (iw_handler) NULL, /* SIOCSIWCOMMIT */ - (iw_handler) ieee80211_ioctl_giwname, /* SIOCGIWNAME */ + (iw_handler) cfg80211_wext_giwname, /* SIOCGIWNAME */ (iw_handler) NULL, /* SIOCSIWNWID */ (iw_handler) NULL, /* SIOCGIWNWID */ (iw_handler) ieee80211_ioctl_siwfreq, /* SIOCSIWFREQ */ (iw_handler) ieee80211_ioctl_giwfreq, /* SIOCGIWFREQ */ - (iw_handler) ieee80211_ioctl_siwmode, /* SIOCSIWMODE */ - (iw_handler) ieee80211_ioctl_giwmode, /* SIOCGIWMODE */ + (iw_handler) cfg80211_wext_siwmode, /* SIOCSIWMODE */ + (iw_handler) cfg80211_wext_giwmode, /* SIOCGIWMODE */ (iw_handler) NULL, /* SIOCSIWSENS */ (iw_handler) NULL, /* SIOCGIWSENS */ (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 139b5f267b3..ac71b38f7cb 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -114,8 +114,8 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_master_priv *mpriv = netdev_priv(dev); struct ieee80211_local *local = mpriv->local; + struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; u16 queue; u8 tid; @@ -124,21 +124,19 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; - if (info->flags & IEEE80211_TX_CTL_REQUEUE) { + if (skb->requeue) { + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (sta) { - struct ieee80211_hw *hw = &local->hw; int ampdu_queue = sta->tid_to_tx_q[tid]; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); @@ -159,20 +157,18 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) *p++ = ack_policy | tid; *p = 0; + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); if (sta) { int ampdu_queue = sta->tid_to_tx_q[tid]; - struct ieee80211_hw *hw = &local->hw; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); @@ -206,13 +202,11 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, * on the previous queue * since HT is strict in order */ #ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) { - DECLARE_MAC_BUF(mac); + if (net_ratelimit()) printk(KERN_DEBUG "allocated aggregation queue" - " %d tid %d addr %s pool=0x%lX\n", - i, tid, print_mac(mac, sta->sta.addr), + " %d tid %d addr %pM pool=0x%lX\n", + i, tid, sta->sta.addr, local->queue_pool[0]); - } #endif /* CONFIG_MAC80211_HT_DEBUG */ return 0; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 6db649480e8..7aa63caf8d5 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -49,8 +49,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) && !wpa_test) { - /* hwaccel - with no need for preallocated room for Michael MIC - */ + /* hwaccel - with no need for preallocated room for MMIC */ return TX_CONTINUE; } @@ -67,8 +66,6 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) #else authenticator = 1; #endif - /* At this point we know we're using ALG_TKIP. To get the MIC key - * we now will rely on the offset from the ieee80211_key_conf::key */ key_offset = authenticator ? NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; @@ -90,11 +87,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; int authenticator = 1, wpa_test = 0; - DECLARE_MAC_BUF(mac); - /* - * No way to verify the MIC if the hardware stripped it - */ + /* No way to verify the MIC if the hardware stripped it */ if (rx->status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; @@ -116,8 +110,6 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) #else authenticator = 1; #endif - /* At this point we know we're using ALG_TKIP. To get the MIC key - * we now will rely on the offset from the ieee80211_key_conf::key */ key_offset = authenticator ? NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; @@ -202,6 +194,7 @@ ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; + int i; ieee80211_tx_set_protected(tx); @@ -209,9 +202,8 @@ ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) return TX_DROP; if (tx->extra_frag) { - int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (tkip_encrypt_skb(tx, tx->extra_frag[i]) < 0) + if (tkip_encrypt_skb(tx, tx->extra_frag[i])) return TX_DROP; } } @@ -227,7 +219,6 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; - DECLARE_MAC_BUF(mac); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -350,7 +341,7 @@ static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id) } -static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr) +static inline void ccmp_hdr2pn(u8 *pn, u8 *hdr) { pn[0] = hdr[7]; pn[1] = hdr[6]; @@ -358,7 +349,6 @@ static inline int ccmp_hdr2pn(u8 *pn, u8 *hdr) pn[3] = hdr[4]; pn[4] = hdr[1]; pn[5] = hdr[0]; - return (hdr[3] >> 6) & 0x03; } @@ -373,7 +363,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { - /* hwaccel - with no need for preallocated room for CCMP " + /* hwaccel - with no need for preallocated room for CCMP * header or MIC fields */ info->control.hw_key = &tx->key->conf; return 0; @@ -426,6 +416,7 @@ ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; + int i; ieee80211_tx_set_protected(tx); @@ -433,9 +424,8 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) return TX_DROP; if (tx->extra_frag) { - int i; for (i = 0; i < tx->num_extra_frag; i++) { - if (ccmp_encrypt_skb(tx, tx->extra_frag[i]) < 0) + if (ccmp_encrypt_skb(tx, tx->extra_frag[i])) return TX_DROP; } } @@ -453,7 +443,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; u8 pn[CCMP_PN_LEN]; int data_len; - DECLARE_MAC_BUF(mac); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -468,7 +457,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) (rx->status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - (void) ccmp_hdr2pn(pn, skb->data + hdrlen); + ccmp_hdr2pn(pn, skb->data + hdrlen); if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; @@ -483,9 +472,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf, skb->data + hdrlen + CCMP_HDR_LEN, data_len, skb->data + skb->len - CCMP_MIC_LEN, - skb->data + hdrlen + CCMP_HDR_LEN)) { + skb->data + hdrlen + CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; - } } memcpy(key->u.ccmp.rx_pn[rx->queue], pn, CCMP_PN_LEN); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 25dcef9f219..c2bac9cd0ca 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -373,11 +373,10 @@ config NETFILTER_XT_TARGET_MARK config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' default m if NETFILTER_ADVANCED=n + select NETFILTER_NETLINK_LOG help This option enables the NFLOG target, which allows to LOG - messages through the netfilter logging API, which can use - either the old LOG target, the old ULOG target or nfnetlink_log - as backend. + messages through nfnetlink_log. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9a24332fbed..60aba45023f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -820,13 +820,11 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, - "%-3s " NIP6_FMT " %04X " NIP6_FMT - " %04X " NIP6_FMT " %04X %-11s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), - NIP6(cp->caddr.in6), ntohs(cp->cport), - NIP6(cp->vaddr.in6), ntohs(cp->vport), - NIP6(cp->daddr.in6), ntohs(cp->dport), + &cp->caddr.in6, ntohs(cp->cport), + &cp->vaddr.in6, ntohs(cp->vport), + &cp->daddr.in6, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ); else @@ -883,13 +881,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, - "%-3s " NIP6_FMT " %04X " NIP6_FMT - " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), - NIP6(cp->caddr.in6), ntohs(cp->cport), - NIP6(cp->vaddr.in6), ntohs(cp->vport), - NIP6(cp->daddr.in6), ntohs(cp->dport), + &cp->caddr.in6, ntohs(cp->cport), + &cp->vaddr.in6, ntohs(cp->vport), + &cp->daddr.in6, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 958abf3e5f8..cb3e031335e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -730,9 +730,9 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. @@ -805,9 +805,9 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), - NIP6(iph->saddr), NIP6(iph->daddr)); + &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. @@ -1070,9 +1070,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", + IP_VS_DBG(12, "Incoming ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. @@ -1127,8 +1127,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ - IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", - NIPQUAD(iph->saddr)); + IP_VS_DBG(1, "Incoming ICMP: failed checksum from %pI4!\n", + &iph->saddr); goto out; } @@ -1175,9 +1175,9 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), - NIP6(iph->saddr), NIP6(iph->daddr)); + &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0302cf3e503..e01061f49cd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1168,15 +1168,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6) { - if (!sched->supports_ipv6) { - ret = -EAFNOSUPPORT; - goto out_err; - } - if ((u->netmask < 1) || (u->netmask > 128)) { - ret = -EINVAL; - goto out_err; - } + if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { + ret = -EINVAL; + goto out_err; } #endif @@ -1272,15 +1266,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = sched; #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6) { - if (!sched->supports_ipv6) { - ret = -EAFNOSUPPORT; - goto out; - } - if ((u->netmask < 1) || (u->netmask > 128)) { - ret = -EINVAL; - goto out; - } + if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { + ret = -EINVAL; + goto out; } #endif @@ -1557,7 +1545,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_amemthresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #ifdef CONFIG_IP_VS_DEBUG { @@ -1565,7 +1553,7 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, #endif { @@ -1573,28 +1561,28 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "drop_entry", .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, { .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_defense_mode, + .proc_handler = proc_do_defense_mode, }, #if 0 { @@ -1602,84 +1590,84 @@ static struct ctl_table vs_vars[] = { .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synsent", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synrecv", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_finwait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_timewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_close", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_closewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_lastack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_listen", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_synack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_udp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "timeout_icmp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, #endif { @@ -1687,35 +1675,35 @@ static struct ctl_table vs_vars[] = { .data = &sysctl_ip_vs_cache_bypass, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_nodest_conn", .data = &sysctl_ip_vs_expire_nodest_conn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "expire_quiescent_template", .data = &sysctl_ip_vs_expire_quiescent_template, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .procname = "sync_threshold", .data = &sysctl_ip_vs_sync_threshold, .maxlen = sizeof(sysctl_ip_vs_sync_threshold), .mode = 0644, - .proc_handler = &proc_do_sync_threshold, + .proc_handler = proc_do_sync_threshold, }, { .procname = "nat_icmp_send", .data = &sysctl_ip_vs_nat_icmp_send, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; @@ -1867,9 +1855,9 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ", + seq_printf(seq, "%s [%pI6]:%04X %s ", ip_vs_proto_name(svc->protocol), - NIP6(svc->addr.in6), + &svc->addr.in6, ntohs(svc->port), svc->scheduler->name); else @@ -1895,9 +1883,9 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, - " -> [" NIP6_FMT "]:%04X" + " -> [%pI6]:%04X" " %-7s %-6d %-10d %-10d\n", - NIP6(dest->addr.in6), + &dest->addr.in6, ntohs(dest->port), ip_vs_fwd_name(atomic_read(&dest->conn_flags)), atomic_read(&dest->weight), @@ -2141,8 +2129,8 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { - IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", - usvc.protocol, NIPQUAD(usvc.addr.ip), + IP_VS_ERR("set_ctl: invalid protocol: %d %pI4:%d %s\n", + usvc.protocol, &usvc.addr.ip, ntohs(usvc.port), usvc.sched_name); ret = -EFAULT; goto out_unlock; diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index a16943fd72f..a9dac74bb13 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -64,9 +64,16 @@ struct ip_vs_dh_bucket { /* * Returns hash value for IPVS DH entry */ -static inline unsigned ip_vs_dh_hashkey(__be32 addr) +static inline unsigned ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr) { - return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK; } @@ -74,9 +81,10 @@ static inline unsigned ip_vs_dh_hashkey(__be32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) +ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, + const union nf_inet_addr *addr) { - return (tbl[ip_vs_dh_hashkey(addr)]).dest; + return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; } @@ -202,12 +210,14 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_dh_bucket *tbl; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); tbl = (struct ip_vs_dh_bucket *)svc->sched_data; - dest = ip_vs_dh_get(tbl, iph->daddr); + dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -215,11 +225,10 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } - IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " - "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(iph->daddr), - NIPQUAD(dest->addr.ip), - ntohs(dest->port)); + IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", + IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); return dest; } @@ -234,9 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 0, -#endif .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2e7dbd8b73a..428edbf481c 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -178,10 +178,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " - "%u.%u.%u.%u:%d detected\n", - NIPQUAD(from.ip), ntohs(port), - NIPQUAD(cp->caddr.ip), 0); + IP_VS_DBG(7, "PASV response (%pI4:%d) -> %pI4:%d detected\n", + &from.ip, ntohs(port), &cp->caddr.ip, 0); /* * Now update or create an connection entry for it @@ -312,8 +310,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", - NIPQUAD(to.ip), ntohs(port)); + IP_VS_DBG(7, "PORT %pI4:%d detected\n", &to.ip, ntohs(port)); /* Passive mode off */ cp->app_data = NULL; @@ -321,9 +318,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Now update or create a connection entry for it */ - IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", + IP_VS_DBG(7, "protocol %s %pI4:%d %pI4:%d\n", ip_vs_proto_name(iph->protocol), - NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); + &to.ip, ntohs(port), &cp->vaddr.ip, 0); n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, &to, port, diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6ecef3518ca..9394f539966 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -86,7 +86,8 @@ static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; */ struct ip_vs_lblc_entry { struct list_head list; - __be32 addr; /* destination IP address */ + int af; /* address family */ + union nf_inet_addr addr; /* destination IP address */ struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ }; @@ -115,7 +116,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblc_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; @@ -137,9 +138,17 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) /* * Returns hash value for IPVS LBLC entry */ -static inline unsigned ip_vs_lblc_hashkey(__be32 addr) +static inline unsigned +ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr) { - return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK; + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK; } @@ -150,7 +159,7 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) static void ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { - unsigned hash = ip_vs_lblc_hashkey(en->addr); + unsigned hash = ip_vs_lblc_hashkey(en->af, &en->addr); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); @@ -162,13 +171,14 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) * lock */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) +ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, + const union nf_inet_addr *addr) { - unsigned hash = ip_vs_lblc_hashkey(addr); + unsigned hash = ip_vs_lblc_hashkey(af, addr); struct ip_vs_lblc_entry *en; list_for_each_entry(en, &tbl->bucket[hash], list) - if (en->addr == addr) + if (ip_vs_addr_equal(af, &en->addr, addr)) return en; return NULL; @@ -180,12 +190,12 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) * address to a server. Called under write lock. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, +ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, struct ip_vs_dest *dest) { struct ip_vs_lblc_entry *en; - en = ip_vs_lblc_get(tbl, daddr); + en = ip_vs_lblc_get(dest->af, tbl, daddr); if (!en) { en = kmalloc(sizeof(*en), GFP_ATOMIC); if (!en) { @@ -193,7 +203,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, return NULL; } - en->addr = daddr; + en->af = dest->af; + ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; atomic_inc(&dest->refcnt); @@ -369,7 +380,7 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) static inline struct ip_vs_dest * -__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblc_schedule(struct ip_vs_service *svc) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -420,12 +431,13 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) } } - IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "LBLC: server %s:%d " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -459,15 +471,17 @@ static struct ip_vs_dest * ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); /* First look in our cache */ read_lock(&svc->sched_lock); - en = ip_vs_lblc_get(tbl, iph->daddr); + en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -491,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) goto out; /* No cache entry or it is invalid, time to schedule */ - dest = __ip_vs_lblc_schedule(svc, iph); + dest = __ip_vs_lblc_schedule(svc); if (!dest) { IP_VS_DBG(1, "no destination available\n"); return NULL; @@ -499,15 +513,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ write_lock(&svc->sched_lock); - ip_vs_lblc_new(tbl, iph->daddr, dest); + ip_vs_lblc_new(tbl, &iph.daddr, dest); write_unlock(&svc->sched_lock); out: - IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " - "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(iph->daddr), - NIPQUAD(dest->addr.ip), - ntohs(dest->port)); + IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", + IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; } @@ -522,9 +534,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 0, -#endif .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .schedule = ip_vs_lblc_schedule, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 1f75ea83bcf..92dc76a6842 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -202,12 +202,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) } } - IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "ip_vs_dest_set_min: server %s:%d " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -248,12 +249,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) } } - IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(most->addr.ip), ntohs(most->port), - atomic_read(&most->activeconns), - atomic_read(&most->refcnt), - atomic_read(&most->weight), moh); + IP_VS_DBG_BUF(6, "ip_vs_dest_set_max: server %s:%d " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port), + atomic_read(&most->activeconns), + atomic_read(&most->refcnt), + atomic_read(&most->weight), moh); return most; } @@ -264,7 +265,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) */ struct ip_vs_lblcr_entry { struct list_head list; - __be32 addr; /* destination IP address */ + int af; /* address family */ + union nf_inet_addr addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ }; @@ -293,7 +295,7 @@ static ctl_table vs_vars_table[] = { .data = &sysctl_ip_vs_lblcr_expiration, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 } }; @@ -311,9 +313,17 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) /* * Returns hash value for IPVS LBLCR entry */ -static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) +static inline unsigned +ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr) { - return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; } @@ -324,7 +334,7 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) static void ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { - unsigned hash = ip_vs_lblcr_hashkey(en->addr); + unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); @@ -336,13 +346,14 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) * read lock. */ static inline struct ip_vs_lblcr_entry * -ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) +ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, + const union nf_inet_addr *addr) { - unsigned hash = ip_vs_lblcr_hashkey(addr); + unsigned hash = ip_vs_lblcr_hashkey(af, addr); struct ip_vs_lblcr_entry *en; list_for_each_entry(en, &tbl->bucket[hash], list) - if (en->addr == addr) + if (ip_vs_addr_equal(af, &en->addr, addr)) return en; return NULL; @@ -354,12 +365,12 @@ ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) * IP address to a server. Called under write lock. */ static inline struct ip_vs_lblcr_entry * -ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, +ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, struct ip_vs_dest *dest) { struct ip_vs_lblcr_entry *en; - en = ip_vs_lblcr_get(tbl, daddr); + en = ip_vs_lblcr_get(dest->af, tbl, daddr); if (!en) { en = kmalloc(sizeof(*en), GFP_ATOMIC); if (!en) { @@ -367,7 +378,8 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, return NULL; } - en->addr = daddr; + en->af = dest->af; + ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; /* initilize its dest set */ @@ -544,7 +556,7 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) static inline struct ip_vs_dest * -__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblcr_schedule(struct ip_vs_service *svc) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -596,12 +608,13 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) } } - IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "LBLCR: server %s:%d " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(least->af, &least->addr), + ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -635,15 +648,17 @@ static struct ip_vs_dest * ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_lblcr_table *tbl = svc->sched_data; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblcr_entry *en; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); /* First look in our cache */ read_lock(&svc->sched_lock); - en = ip_vs_lblcr_get(tbl, iph->daddr); + en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -673,7 +688,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } /* The cache entry is invalid, time to schedule */ - dest = __ip_vs_lblcr_schedule(svc, iph); + dest = __ip_vs_lblcr_schedule(svc); if (!dest) { IP_VS_DBG(1, "no destination available\n"); read_unlock(&svc->sched_lock); @@ -691,7 +706,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) goto out; /* No cache entry, time to schedule */ - dest = __ip_vs_lblcr_schedule(svc, iph); + dest = __ip_vs_lblcr_schedule(svc); if (!dest) { IP_VS_DBG(1, "no destination available\n"); return NULL; @@ -699,15 +714,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ write_lock(&svc->sched_lock); - ip_vs_lblcr_new(tbl, iph->daddr, dest); + ip_vs_lblcr_new(tbl, &iph.daddr, dest); write_unlock(&svc->sched_lock); out: - IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " - "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(iph->daddr), - NIPQUAD(dest->addr.ip), - ntohs(dest->port)); + IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", + IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; } @@ -722,9 +735,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 0, -#endif .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .schedule = ip_vs_lblcr_schedule, diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index b69f808ac46..51912cab777 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -81,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_lc_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 9a2d8033f08..6758ad2ceaa 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -116,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_nq_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 0791f9e08fe..a01520e3d6b 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -164,26 +164,21 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else if (ih->frag_off & htons(IP_OFFSET)) - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); + sprintf(buf, "%s %pI4->%pI4 frag", + pp->name, &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr ; pptr = skb_header_pointer(skb, offset + ih->ihl*4, sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, - NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); + sprintf(buf, "%s TRUNCATED %pI4->%pI4", + pp->name, &ih->saddr, &ih->daddr); else - sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u", + sprintf(buf, "%s %pI4:%u->%pI4:%u", pp->name, - NIPQUAD(ih->saddr), - ntohs(pptr[0]), - NIPQUAD(ih->daddr), - ntohs(pptr[1])); + &ih->saddr, ntohs(pptr[0]), + &ih->daddr, ntohs(pptr[1])); } printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); @@ -203,26 +198,21 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else if (ih->nexthdr == IPPROTO_FRAGMENT) - sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag", - pp->name, NIP6(ih->saddr), - NIP6(ih->daddr)); + sprintf(buf, "%s %pI6->%pI6 frag", + pp->name, &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT, - pp->name, - NIP6(ih->saddr), - NIP6(ih->daddr)); + sprintf(buf, "%s TRUNCATED %pI6->%pI6", + pp->name, &ih->saddr, &ih->daddr); else - sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u", + sprintf(buf, "%s %pI6:%u->%pI6:%u", pp->name, - NIP6(ih->saddr), - ntohs(pptr[0]), - NIP6(ih->daddr), - ntohs(pptr[1])); + &ih->saddr, ntohs(pptr[0]), + &ih->daddr, ntohs(pptr[1])); } printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 80ab0c8e5b4..79f56c1e7c1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -135,9 +135,8 @@ ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); + sprintf(buf, "%s %pI4->%pI4", + pp->name, &ih->saddr, &ih->daddr); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } @@ -154,9 +153,8 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else - sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT, - pp->name, NIP6(ih->saddr), - NIP6(ih->daddr)); + sprintf(buf, "%s %pI6->%pI6", + pp->name, &ih->saddr, &ih->daddr); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index dd4566ea2bf..8cba4180285 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -192,8 +192,8 @@ tcp_snat_handler(struct sk_buff *skb, /* Adjust TCP checksums */ if (skb->ip_summed == CHECKSUM_PARTIAL) { tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - tcphoff)); + htons(oldlen), + htons(skb->len - tcphoff)); } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, @@ -267,8 +267,8 @@ tcp_dnat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - tcphoff)); + htons(oldlen), + htons(skb->len - tcphoff)); } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 6eb6039d634..d2930a71084 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -203,8 +203,8 @@ udp_snat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - udphoff)); + htons(oldlen), + htons(skb->len - udphoff)); } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, @@ -279,8 +279,8 @@ udp_dnat_handler(struct sk_buff *skb, */ if (skb->ip_summed == CHECKSUM_PARTIAL) { udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, - htonl(oldlen), - htonl(skb->len - udphoff)); + htons(oldlen), + htons(skb->len - udphoff)); } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index a22195f68ac..8fb51c169eb 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -89,9 +89,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_rr_init_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7d2f22f04b8..691a6a0086e 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -118,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_sed_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 1d96de27fef..0e53955ef13 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -61,9 +61,16 @@ struct ip_vs_sh_bucket { /* * Returns hash value for IPVS SH entry */ -static inline unsigned ip_vs_sh_hashkey(__be32 addr) +static inline unsigned ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) { - return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK; + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif + return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; } @@ -71,9 +78,10 @@ static inline unsigned ip_vs_sh_hashkey(__be32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr) +ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, + const union nf_inet_addr *addr) { - return (tbl[ip_vs_sh_hashkey(addr)]).dest; + return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; } @@ -199,12 +207,14 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_sh_bucket *tbl; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); tbl = (struct ip_vs_sh_bucket *)svc->sched_data; - dest = ip_vs_sh_get(tbl, iph->saddr); + dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -212,11 +222,10 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } - IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " - "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(iph->saddr), - NIPQUAD(dest->addr.ip), - ntohs(dest->port)); + IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", + IP_VS_DBG_ADDR(svc->af, &iph.saddr), + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); return dest; } @@ -231,9 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 0, -#endif .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index de5e7e118ee..6be5d4efa51 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -580,8 +580,8 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) IP_VS_ERR("You probably need to specify IP address on " "multicast interface.\n"); - IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n", - ifname, NIPQUAD(addr)); + IP_VS_DBG(7, "binding socket with (%s) %pI4\n", + ifname, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 8c596e71259..57b452bbb4e 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -106,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_wlc_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 7ea92fed50b..2f618dc29c5 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -213,9 +213,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index e90d52f199b..425ab144f15 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -82,14 +82,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) if (ip_route_output_key(&init_net, &rt, &fl)) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip_route_output error, " - "dest: %u.%u.%u.%u\n", - NIPQUAD(dest->addr.ip)); + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", + &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); - IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", - NIPQUAD(dest->addr.ip), + IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", + &dest->addr.ip, atomic_read(&rt->u.dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); @@ -104,8 +103,8 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) }; if (ip_route_output_key(&init_net, &rt, &fl)) { - IP_VS_DBG_RL("ip_route_output error, dest: " - "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip)); + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", + &cp->daddr.ip); return NULL; } } @@ -141,14 +140,13 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) NULL, &fl); if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip6_route_output error, " - "dest: " NIP6_FMT "\n", - NIP6(dest->addr.in6)); + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", + &dest->addr.in6); return NULL; } __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); - IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n", - NIP6(dest->addr.in6), + IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", + &dest->addr.in6, atomic_read(&rt->u.dst.__refcnt)); } spin_unlock(&dest->dst_lock); @@ -167,8 +165,8 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip6_route_output error, dest: " - NIP6_FMT "\n", NIP6(cp->daddr.in6)); + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", + &cp->daddr.in6); return NULL; } } @@ -237,8 +235,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (ip_route_output_key(&init_net, &rt, &fl)) { - IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, " - "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); + IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, dest: %pI4\n", + &iph->daddr); goto tx_error_icmp; } @@ -301,8 +299,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, " - "dest: " NIP6_FMT "\n", NIP6(iph->daddr)); + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n", + &iph->daddr); goto tx_error_icmp; } diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index b92df5c1dfc..9fe8982bd7c 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -35,7 +35,7 @@ static struct ctl_table acct_sysctl_table[] = { .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 38aedeeaf4e..4f8fcf49854 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -30,6 +30,7 @@ MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_amanda"); +MODULE_ALIAS_NFCT_HELPER("amanda"); module_param(master_timeout, uint, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 233fdd2d7d2..7e83f74cd5d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -39,13 +39,13 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> #define NF_CONNTRACK_VERSION "0.5.0" -unsigned int -(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, - enum nf_nat_manip_type manip, - struct nlattr *attr) __read_mostly; +int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); DEFINE_SPINLOCK(nf_conntrack_lock); @@ -181,7 +181,8 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); - nf_conntrack_event(IPCT_DESTROY, ct); + if (!test_bit(IPS_DYING_BIT, &ct->status)) + nf_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); /* To make sure we don't get any weird locking issues here: @@ -586,14 +587,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - struct nf_conntrack_helper *helper; - - helper = __nf_ct_helper_find(&repl_tuple); - if (helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help) - rcu_assign_pointer(help->helper, helper); - } + __nf_ct_try_assign_helper(ct, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -770,7 +764,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply) { struct nf_conn_help *help = nfct_help(ct); - struct nf_conntrack_helper *helper; /* Should be unconfirmed, so not in hash table yet */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); @@ -783,23 +776,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - helper = __nf_ct_helper_find(newreply); - if (helper == NULL) { - if (help) - rcu_assign_pointer(help->helper, NULL); - goto out; - } - - if (help == NULL) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help == NULL) - goto out; - } else { - memset(&help->help, 0, sizeof(help->help)); - } - - rcu_assign_pointer(help->helper, helper); -out: + __nf_ct_try_assign_helper(ct, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -994,8 +971,20 @@ void nf_ct_iterate_cleanup(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); +struct __nf_ct_flush_report { + u32 pid; + int report; +}; + static int kill_all(struct nf_conn *i, void *data) { + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; + + /* get_next_corpse sets the dying bit for us */ + nf_conntrack_event_report(IPCT_DESTROY, + i, + fr->pid, + fr->report); return 1; } @@ -1009,9 +998,13 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(struct net *net) +void nf_conntrack_flush(struct net *net, u32 pid, int report) { - nf_ct_iterate_cleanup(net, kill_all, NULL); + struct __nf_ct_flush_report fr = { + .pid = pid, + .report = report, + }; + nf_ct_iterate_cleanup(net, kill_all, &fr); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); @@ -1027,7 +1020,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_event_cache_flush(net); nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(net); + nf_conntrack_flush(net, 0, 0); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index a5f5e2e65d1..dee4190209c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -35,9 +35,17 @@ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) - && ecache->events) - atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, - ecache->ct); + && ecache->events) { + struct nf_ct_event item = { + .ct = ecache->ct, + .pid = 0, + .report = 0 + }; + + atomic_notifier_call_chain(&nf_conntrack_chain, + ecache->events, + &item); + } ecache->events = 0; nf_ct_put(ecache->ct); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 37a703bc3b8..3a8a34a6d37 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -362,7 +362,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) return 1; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; struct nf_conntrack_expect *i; @@ -371,11 +371,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret; - - NF_CT_ASSERT(master_help); + int ret = 0; - spin_lock_bh(&nf_conntrack_lock); if (!master_help->helper) { ret = -ESHUTDOWN; goto out; @@ -409,18 +406,50 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); ret = -EMFILE; - goto out; } +out: + return ret; +} + +int nf_ct_expect_related(struct nf_conntrack_expect *expect) +{ + int ret; + + spin_lock_bh(&nf_conntrack_lock); + ret = __nf_ct_expect_check(expect); + if (ret < 0) + goto out; nf_ct_expect_insert(expect); + atomic_inc(&expect->use); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_event(IPEXP_NEW, expect); - ret = 0; + nf_ct_expect_put(expect); + return ret; out: spin_unlock_bh(&nf_conntrack_lock); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related); +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) +{ + int ret; + + spin_lock_bh(&nf_conntrack_lock); + ret = __nf_ct_expect_check(expect); + if (ret < 0) + goto out; + nf_ct_expect_insert(expect); +out: + spin_unlock_bh(&nf_conntrack_lock); + if (ret == 0) + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); + #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 4f7107107e9..00fecc385f9 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -29,6 +29,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); +MODULE_ALIAS_NFCT_HELPER("ftp"); /* This is slow, but it's simple. --RR */ static char *ftp_buffer; @@ -357,7 +358,7 @@ static int help(struct sk_buff *skb, int ret; u32 seq; int dir = CTINFO2DIR(ctinfo); - unsigned int matchlen, matchoff; + unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; @@ -427,10 +428,8 @@ static int help(struct sk_buff *skb, connection tracking, not packet filtering. However, it is necessary for accurate tracking in this case. */ - if (net_ratelimit()) - printk("conntrack_ftp: partial %s %u+%u\n", - search[dir][i].pattern, - ntohl(th->seq), datalen); + pr_debug("conntrack_ftp: partial %s %u+%u\n", + search[dir][i].pattern, ntohl(th->seq), datalen); ret = NF_DROP; goto out; } else if (found == 0) { /* No match */ @@ -462,16 +461,13 @@ static int help(struct sk_buff *skb, different IP address. Simply don't record it for NAT. */ if (cmd.l3num == PF_INET) { - pr_debug("conntrack_ftp: NOT RECORDING: " NIPQUAD_FMT - " != " NIPQUAD_FMT "\n", - NIPQUAD(cmd.u3.ip), - NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); + pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n", + &cmd.u3.ip, + &ct->tuplehash[dir].tuple.src.u3.ip); } else { - pr_debug("conntrack_ftp: NOT RECORDING: " NIP6_FMT - " != " NIP6_FMT "\n", - NIP6(*((struct in6_addr *)cmd.u3.ip6)), - NIP6(*((struct in6_addr *) - ct->tuplehash[dir].tuple.src.u3.ip6))); + pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n", + cmd.u3.ip6, + ct->tuplehash[dir].tuple.src.u3.ip6); } /* Thanks to Cristiano Lincoln Mattos diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index c1504f71cdf..687bd633c3d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -850,10 +850,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set destCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), + pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", + &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, &setup->destCallSignalAddress, @@ -868,10 +866,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set sourceCallSignalAddress " - NIP6_FMT ":%hu->" NIP6_FMT ":%hu\n", - NIP6(*(struct in6_addr *)&addr), ntohs(port), - NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), + pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", + &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, &setup->sourceCallSignalAddress, @@ -1831,3 +1827,4 @@ MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); MODULE_DESCRIPTION("H.323 connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_h323"); +MODULE_ALIAS_NFCT_HELPER("h323"); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c39b6a99413..a51bdac9f3a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -45,7 +45,7 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } -struct nf_conntrack_helper * +static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; @@ -63,7 +63,6 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } return NULL; } -EXPORT_SYMBOL_GPL(__nf_ct_helper_find); struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name) @@ -95,6 +94,35 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); +int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +{ + int ret = 0; + struct nf_conntrack_helper *helper; + struct nf_conn_help *help = nfct_help(ct); + + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (helper == NULL) { + if (help) + rcu_assign_pointer(help->helper, NULL); + goto out; + } + + if (help == NULL) { + help = nf_ct_helper_ext_add(ct, flags); + if (help == NULL) { + ret = -ENOMEM; + goto out; + } + } else { + memset(&help->help, 0, sizeof(help->help)); + } + + rcu_assign_pointer(help->helper, helper); +out: + return ret; +} +EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); + static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 20633fdf7e6..409c8be58e7 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -41,6 +41,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_irc"); +MODULE_ALIAS_NFCT_HELPER("irc"); module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); @@ -156,9 +157,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ iph = ip_hdr(skb); - pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + pr_debug("DCC found in master %pI4:%u %pI4:%u\n", + &iph->saddr, ntohs(th->source), + &iph->daddr, ntohs(th->dest)); for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { @@ -185,10 +186,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, tuple->dst.u3.ip != htonl(dcc_ip)) { if (net_ratelimit()) printk(KERN_WARNING - "Forged DCC command from " - "%u.%u.%u.%u: %u.%u.%u.%u:%u\n", - NIPQUAD(tuple->src.u3.ip), - HIPQUAD(dcc_ip), dcc_port); + "Forged DCC command from %pI4: %pI4:%u\n", + &tuple->src.u3.ip, + &dcc_ip, dcc_port); continue; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 08404e6755f..5af4273b466 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -37,6 +37,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); +MODULE_ALIAS_NFCT_HELPER("netbios_ns"); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5f4a6516b3b..00e8c27130f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -105,16 +105,14 @@ ctnetlink_dump_tuples(struct sk_buff *skb, struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + l3proto = __nf_ct_l3proto_find(tuple->src.l3num); ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto); - nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) return ret; - l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto); - nf_ct_l4proto_put(l4proto); return ret; } @@ -151,11 +149,9 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) struct nlattr *nest_proto; int ret; - l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (!l4proto->to_nlattr) { - nf_ct_l4proto_put(l4proto); + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (!l4proto->to_nlattr) return 0; - } nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED); if (!nest_proto) @@ -163,14 +159,11 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) ret = l4proto->to_nlattr(skb, nest_proto, ct); - nf_ct_l4proto_put(l4proto); - nla_nest_end(skb, nest_proto); return ret; nla_put_failure: - nf_ct_l4proto_put(l4proto); return -1; } @@ -184,7 +177,6 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) if (!help) return 0; - rcu_read_lock(); helper = rcu_dereference(help->helper); if (!helper) goto out; @@ -199,11 +191,9 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) nla_nest_end(skb, nest_helper); out: - rcu_read_unlock(); return 0; nla_put_failure: - rcu_read_unlock(); return -1; } @@ -420,7 +410,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_conn *ct = (struct nf_conn *)ptr; + struct nf_ct_event *item = (struct nf_ct_event *)ptr; + struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; sk_buff_data_t b; @@ -453,7 +444,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, b = skb->tail; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); nlh->nlmsg_flags = flags; @@ -461,6 +452,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; + rcu_read_lock(); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -517,13 +509,15 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + rcu_read_unlock(); nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, group, 0); + nfnetlink_send(skb, item->pid, group, item->report); return NOTIFY_DONE; -nlmsg_failure: nla_put_failure: + rcu_read_unlock(); +nlmsg_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -729,7 +723,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(&init_net); + nf_conntrack_flush(&init_net, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); return 0; } @@ -750,6 +746,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, } } + nf_conntrack_event_report(IPCT_DESTROY, + ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + + /* death_by_timeout would report the event again */ + set_bit(IPS_DYING_BIT, &ct->status); + nf_ct_kill(ct); nf_ct_put(ct); @@ -795,8 +799,10 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; } + rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, 1, ct); + rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; @@ -922,8 +928,22 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) } helper = __nf_conntrack_helper_find_byname(helpname); - if (helper == NULL) + if (helper == NULL) { +#ifdef CONFIG_MODULES + spin_unlock_bh(&nf_conntrack_lock); + + if (request_module("nfct-helper-%s", helpname) < 0) { + spin_lock_bh(&nf_conntrack_lock); + return -EOPNOTSUPP; + } + + spin_lock_bh(&nf_conntrack_lock); + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper) + return -EAGAIN; +#endif return -EOPNOTSUPP; + } if (help) { if (help->helper == helper) @@ -1079,15 +1099,38 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) return 0; } +static inline void +ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) +{ + unsigned int events = 0; + + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) + events |= IPCT_RELATED; + else + events |= IPCT_NEW; + + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_REFRESH | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK | + events, + ct, + pid, + report); +} + static int ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, - struct nf_conn *master_ct) + struct nf_conn *master_ct, + u32 pid, + int report) { struct nf_conn *ct; int err = -EINVAL; - struct nf_conn_help *help; struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); @@ -1102,16 +1145,55 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->status |= IPS_CONFIRMED; rcu_read_lock(); - helper = __nf_ct_helper_find(rtuple); - if (helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help == NULL) { + if (cda[CTA_HELP]) { + char *helpname; + + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + if (err < 0) { + rcu_read_unlock(); + goto err; + } + + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper == NULL) { + rcu_read_unlock(); +#ifdef CONFIG_MODULES + if (request_module("nfct-helper-%s", helpname) < 0) { + err = -EOPNOTSUPP; + goto err; + } + + rcu_read_lock(); + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper) { + rcu_read_unlock(); + err = -EAGAIN; + goto err; + } + rcu_read_unlock(); +#endif + err = -EOPNOTSUPP; + goto err; + } else { + struct nf_conn_help *help; + + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + if (help == NULL) { + rcu_read_unlock(); + err = -ENOMEM; + goto err; + } + + /* not in hash table yet so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } + } else { + /* try an implicit helper assignation */ + err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + if (err < 0) { rcu_read_unlock(); - err = -ENOMEM; goto err; } - /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); } if (cda[CTA_STATUS]) { @@ -1151,9 +1233,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->master = master_ct; } + nf_conntrack_get(&ct->ct_general); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); rcu_read_unlock(); + ctnetlink_event_report(ct, pid, report); + nf_ct_put(ct); return 0; @@ -1209,7 +1294,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, goto out_unlock; } master_ct = nf_ct_tuplehash_to_ctrack(master_h); - atomic_inc(&master_ct->ct_general.use); + nf_conntrack_get(&master_ct->ct_general); } err = -ENOENT; @@ -1217,9 +1302,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, - master_ct); + master_ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); spin_unlock_bh(&nf_conntrack_lock); - if (err < 0 && master_ct) nf_ct_put(master_ct); @@ -1231,6 +1317,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, * so there's no need to increase the refcount */ err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + /* we only allow nat config for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { err = -EOPNOTSUPP; @@ -1241,8 +1329,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -EOPNOTSUPP; goto out_unlock; } - err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), - cda); + + err = ctnetlink_change_conntrack(ct, cda); + if (err == 0) { + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); + ctnetlink_event_report(ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + nf_ct_put(ct); + } else + spin_unlock_bh(&nf_conntrack_lock); + + return err; } out_unlock: @@ -1293,16 +1392,14 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, if (!nest_parms) goto nla_put_failure; - l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + l3proto = __nf_ct_l3proto_find(tuple->src.l3num); ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); - nf_ct_l3proto_put(l3proto); if (unlikely(ret < 0)) goto nla_put_failure; - l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); - nf_ct_l4proto_put(l4proto); if (unlikely(ret < 0)) goto nla_put_failure; @@ -1379,7 +1476,8 @@ static int ctnetlink_expect_event(struct notifier_block *this, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; + struct nf_exp_event *item = (struct nf_exp_event *)ptr; + struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; sk_buff_data_t b; @@ -1401,7 +1499,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, b = skb->tail; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); nlh->nlmsg_flags = flags; @@ -1409,15 +1507,18 @@ static int ctnetlink_expect_event(struct notifier_block *this, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; + rcu_read_lock(); if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; + rcu_read_unlock(); nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); return NOTIFY_DONE; -nlmsg_failure: nla_put_failure: + rcu_read_unlock(); +nlmsg_failure: kfree_skb(skb); return NOTIFY_DONE; } @@ -1521,9 +1622,11 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (!skb2) goto out; + rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1, exp); + rcu_read_unlock(); if (err <= 0) goto free; @@ -1624,7 +1727,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) } static int -ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) +ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1653,7 +1756,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) if (!help || !help->helper) { /* such conntrack hasn't got any helper, abort */ - err = -EINVAL; + err = -EOPNOTSUPP; goto out; } @@ -1671,7 +1774,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); exp->mask.src.u.all = mask.src.u.all; - err = nf_ct_expect_related(exp); + err = nf_ct_expect_related_report(exp, pid, report); nf_ct_expect_put(exp); out: @@ -1704,8 +1807,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) { spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_expect(cda, u3); + if (nlh->nlmsg_flags & NLM_F_CREATE) { + err = ctnetlink_create_expect(cda, + u3, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + } return err; } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 1bc3001d182..9e169ef2e85 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -37,6 +37,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); MODULE_ALIAS("ip_conntrack_pptp"); +MODULE_ALIAS_NFCT_HELPER("pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index dbe680af85d..4be80d7b879 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -67,7 +67,7 @@ static struct ctl_table generic_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -80,7 +80,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 4ab62ad85dd..1b279f9d6bf 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -341,7 +341,7 @@ static int __init nf_ct_proto_gre_init(void) return rv; } -static void nf_ct_proto_gre_fini(void) +static void __exit nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index ae8c2609e23..74e03790119 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -317,7 +317,7 @@ static int sctp_packet(struct nf_conn *ct, goto out; } - old_state = new_state = SCTP_CONNTRACK_MAX; + old_state = new_state = SCTP_CONNTRACK_NONE; write_lock_bh(&sctp_lock); for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ @@ -548,49 +548,49 @@ static struct ctl_table sctp_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -604,49 +604,49 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_wait", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_cookie_echoed", .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_established", .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_recd", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f947ec41e39..a1edb9c1ade 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1192,70 +1192,70 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", .data = &nf_ct_tcp_timeout_unacknowledged, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, @@ -1263,7 +1263,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1271,7 +1271,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1279,7 +1279,7 @@ static struct ctl_table tcp_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 @@ -1293,63 +1293,63 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, @@ -1357,7 +1357,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, @@ -1365,7 +1365,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, @@ -1373,7 +1373,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7c2ca48698b..2b8b1f579f9 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -143,14 +143,14 @@ static struct ctl_table udp_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 @@ -163,14 +163,14 @@ static struct ctl_table udp_compat_sysctl_table[] = { .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index d22d839e4f9..4579d8de13b 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -151,7 +151,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = CTL_UNNUMBERED, @@ -159,7 +159,7 @@ static struct ctl_table udplite_sysctl_table[] = { .data = &nf_ct_udplite_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec_jiffies, }, { .ctl_name = 0 diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index a94294b2b23..dcfecbb81c4 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -30,6 +30,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>"); MODULE_DESCRIPTION("SANE connection tracking helper"); +MODULE_ALIAS_NFCT_HELPER("sane"); static char *sane_buffer; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 6813f1c8863..4b572163784 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -28,6 +28,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP connection tracking helper"); MODULE_ALIAS("ip_conntrack_sip"); +MODULE_ALIAS_NFCT_HELPER("sip"); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 98106d4e89f..f37b9b74c6a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -336,7 +336,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_COUNT, @@ -344,7 +344,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_BUCKETS, @@ -352,7 +352,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, @@ -360,7 +360,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, @@ -368,8 +368,8 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, @@ -379,7 +379,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; @@ -393,7 +393,7 @@ static ctl_table nf_ct_netfilter_table[] = { .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index f57f6e7a71e..46e646b2e9b 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -22,6 +22,7 @@ MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("TFTP connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_tftp"); +MODULE_ALIAS_NFCT_HELPER("tftp"); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 41e0105d382..fa49dc7fe10 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -30,6 +30,7 @@ #include <linux/random.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> +#include <net/netfilter/nfnetlink_log.h> #include <asm/atomic.h> @@ -474,8 +475,9 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); - __be32 gid = htonl(skb->sk->sk_socket->file->f_gid); + struct file *file = skb->sk->sk_socket->file; + __be32 uid = htonl(file->f_cred->fsuid); + __be32 gid = htonl(file->f_cred->fsgid); /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); NLA_PUT_BE32(inst->skb, NFULA_UID, uid); @@ -533,7 +535,7 @@ static struct nf_loginfo default_loginfo = { }; /* log handler for internal netfilter logging api */ -static void +void nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -648,6 +650,7 @@ alloc_failure: /* FIXME: statistics */ goto unlock_and_release; } +EXPORT_SYMBOL_GPL(nfulnl_log_packet); static int nfulnl_rcv_nl_event(struct notifier_block *this, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 50e3a52d3b3..a57c5cf018e 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -13,6 +13,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFLOG.h> #include <net/netfilter/nf_log.h> +#include <net/netfilter/nfnetlink_log.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG"); @@ -31,8 +32,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(par->family, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nfulnl_log_packet(par->family, par->hooknum, skb, par->in, + par->out, &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index e5d3e867328..0989f29ade2 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -45,10 +45,8 @@ dccp_find_option(u_int8_t option, unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); unsigned int i; - if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { - *hotdrop = true; - return false; - } + if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) + goto invalid; if (!optlen) return false; @@ -57,9 +55,7 @@ dccp_find_option(u_int8_t option, op = skb_header_pointer(skb, protoff + optoff, optlen, dccp_optbuf); if (op == NULL) { /* If we don't have the whole header, drop packet. */ - spin_unlock_bh(&dccp_buflock); - *hotdrop = true; - return false; + goto partial; } for (i = 0; i < optlen; ) { @@ -76,6 +72,12 @@ dccp_find_option(u_int8_t option, spin_unlock_bh(&dccp_buflock); return false; + +partial: + spin_unlock_bh(&dccp_buflock); +invalid: + *hotdrop = true; + return false; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 6fc4292d46e..f97fded024c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -893,23 +893,21 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, switch (family) { case NFPROTO_IPV4: - return seq_printf(s, "%ld %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u %u %u %u\n", + return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, - NIPQUAD(ent->dst.ip.src), + &ent->dst.ip.src, ntohs(ent->dst.src_port), - NIPQUAD(ent->dst.ip.dst), + &ent->dst.ip.dst, ntohs(ent->dst.dst_port), ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) case NFPROTO_IPV6: - return seq_printf(s, "%ld " NIP6_FMT ":%u->" - NIP6_FMT ":%u %u %u %u\n", + return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, - NIP6(*(struct in6_addr *)&ent->dst.ip6.src), + &ent->dst.ip6.src, ntohs(ent->dst.src_port), - NIP6(*(struct in6_addr *)&ent->dst.ip6.dst), + &ent->dst.ip6.dst, ntohs(ent->dst.dst_port), ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 7ac54eab0b0..501f9b62318 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -26,12 +26,11 @@ iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) if ((ntohl(iph->saddr) < ntohl(info->src.min_ip) || ntohl(iph->saddr) > ntohl(info->src.max_ip)) ^ !!(info->flags & IPRANGE_SRC_INV)) { - pr_debug("src IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), + pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", + &iph->saddr, info->flags & IPRANGE_SRC_INV ? "(INV) " : "", - NIPQUAD(info->src.min_ip), - NIPQUAD(info->src.max_ip)); + &info->src.min_ip, + &info->src.max_ip); return false; } } @@ -39,12 +38,11 @@ iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip) || ntohl(iph->daddr) > ntohl(info->dst.max_ip)) ^ !!(info->flags & IPRANGE_DST_INV)) { - pr_debug("dst IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->daddr), + pr_debug("dst IP %pI4 NOT in range %s%pI4-%pI4\n", + &iph->daddr, info->flags & IPRANGE_DST_INV ? "(INV) " : "", - NIPQUAD(info->dst.min_ip), - NIPQUAD(info->dst.max_ip)); + &info->dst.min_ip, + &info->dst.max_ip); return false; } } @@ -63,12 +61,11 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) m |= ntohl(iph->saddr) > ntohl(info->src_max.ip); m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) { - pr_debug("src IP " NIPQUAD_FMT " NOT in range %s" - NIPQUAD_FMT "-" NIPQUAD_FMT "\n", - NIPQUAD(iph->saddr), + pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", + &iph->saddr, (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "", - NIPQUAD(info->src_max.ip), - NIPQUAD(info->src_max.ip)); + &info->src_max.ip, + &info->src_max.ip); return false; } } @@ -77,12 +74,11 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip); m ^= !!(info->flags & IPRANGE_DST_INV); if (m) { - pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s" - NIPQUAD_FMT "-" NIPQUAD_FMT "\n", - NIPQUAD(iph->daddr), + pr_debug("dst IP %pI4 NOT in range %s%pI4-%pI4\n", + &iph->daddr, (info->flags & IPRANGE_DST_INV) ? "(INV) " : "", - NIPQUAD(info->dst_min.ip), - NIPQUAD(info->dst_max.ip)); + &info->dst_min.ip, + &info->dst_max.ip); return false; } } diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index f19ebd9b78f..22b2a5e881e 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -34,12 +34,12 @@ owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IPT_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) return false; if (info->match & IPT_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) return false; @@ -60,12 +60,12 @@ owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IP6T_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) return false; if (info->match & IP6T_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) return false; @@ -93,14 +93,14 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) - if ((filp->f_uid >= info->uid_min && - filp->f_uid <= info->uid_max) ^ + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ !(info->invert & XT_OWNER_UID)) return false; if (info->match & XT_OWNER_GID) - if ((filp->f_gid >= info->gid_min && - filp->f_gid <= info->gid_max) ^ + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ !(info->invert & XT_OWNER_GID)) return false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 280c471bcdf..fe80b614a40 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -72,9 +72,6 @@ struct recent_entry { struct recent_table { struct list_head list; char name[XT_RECENT_NAME_LEN]; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_old, *proc; -#endif unsigned int refcnt; unsigned int entries; struct list_head lru_list; @@ -284,6 +281,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) { const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif unsigned i; bool ret = false; @@ -318,25 +318,25 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir, + pde = proc_create_data(t->name, ip_list_perms, recent_proc_dir, &recent_mt_fops, t); - if (t->proc == NULL) { + if (pde == NULL) { kfree(t); goto out; } + pde->uid = ip_list_uid; + pde->gid = ip_list_gid; #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir, + pde = proc_create_data(t->name, ip_list_perms, proc_old_dir, &recent_old_fops, t); - if (t->proc_old == NULL) { + if (pde == NULL) { remove_proc_entry(t->name, proc_old_dir); kfree(t); goto out; } - t->proc_old->uid = ip_list_uid; - t->proc_old->gid = ip_list_gid; + pde->uid = ip_list_uid; + pde->gid = ip_list_gid; #endif - t->proc->uid = ip_list_uid; - t->proc->gid = ip_list_gid; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &tables); @@ -422,13 +422,11 @@ static int recent_seq_show(struct seq_file *seq, void *v) i = (e->index - 1) % ip_pkt_list_tot; if (e->family == NFPROTO_IPV4) - seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu " - "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl, - e->stamps[i], e->index); + seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u", + &e->addr.ip, e->ttl, e->stamps[i], e->index); else - seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu " - "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl, - e->stamps[i], e->index); + seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", + &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_printf(seq, "\n"); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 249f6b92f15..834c6eb7f48 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -337,7 +337,7 @@ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr)); + audit_log_format(audit_buf, " %s=%pI4", dir, &addr); if (mask_val != 0xffffffff) { u32 mask_len = 0; while (mask_val > 0) { @@ -371,7 +371,7 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr)); + audit_log_format(audit_buf, " %s=%pI6", dir, addr); if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { u32 mask_len = 0; u32 mask_val; diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 0a0ef17b2a4..1821c5d50fb 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -596,7 +596,6 @@ listdef_failure: /** * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response * @skb: the skb to write to - * @seq: the NETLINK sequence number * @cb: the NETLINK callback * @protocol: the NetLabel protocol to use in the message * diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 90c8506a0aa..8c030803217 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -562,7 +562,6 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; @@ -577,7 +576,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -588,19 +587,21 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); + return 0; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -624,7 +625,6 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = 0; struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; @@ -638,7 +638,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else - ret_val = -ENOENT; + entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -649,19 +649,21 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, addr, mask); if (dev != NULL) dev_put(dev); - if (entry && security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry != NULL && + security_secid_to_secctx(entry->secid, + &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); - return ret_val; + if (entry == NULL) + return -ENOENT; + + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); + return 0; } #endif /* IPv6 */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 480184a857d..9eb895c7a2a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -452,6 +452,10 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) if (err < 0) goto out_module; + local_bh_disable(); + sock_prot_inuse_add(net, &netlink_proto, 1); + local_bh_enable(); + nlk = nlk_sk(sock->sk); nlk->module = module; out: @@ -511,6 +515,9 @@ static int netlink_release(struct socket *sock) kfree(nlk->groups); nlk->groups = NULL; + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); + local_bh_enable(); sock_put(sk); return 0; } diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 2d106cfe1d2..56c3ce7fe29 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -83,6 +83,12 @@ static int validate_nla(struct nlattr *nla, int maxtype, if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) return -ERANGE; break; + case NLA_NESTED: + /* a nested attributes is allowed to be empty; if its not, + * it must have a size of at least NLA_HDRLEN. + */ + if (attrlen == 0) + break; default: if (pt->len) minlen = pt->len; @@ -233,7 +239,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) * * Returns the number of bytes copied. */ -int nla_memcpy(void *dest, struct nlattr *src, int count) +int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 9f1ea4a27b3..e9c05b8f4f4 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -609,7 +609,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { source = &addr->fsa_ax25.sax25_call; - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (user) { nr->user_addr = user->call; ax25_uid_put(user); @@ -683,7 +683,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, } source = (ax25_address *)dev->dev_addr; - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (user) { nr->user_addr = user->call; ax25_uid_put(user); diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 34c96c9674d..7b49591fe87 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -41,8 +41,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_default_path_quality, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_quality, .extra2 = &max_quality }, @@ -52,8 +52,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_obsolescence_count_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_obs, .extra2 = &max_obs }, @@ -63,8 +63,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_network_ttl_initialiser, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ttl, .extra2 = &max_ttl }, @@ -74,8 +74,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, @@ -85,8 +85,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_maximum_tries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, @@ -96,8 +96,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_acknowledge_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, @@ -107,8 +107,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_busy_delay, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_t4, .extra2 = &max_t4 }, @@ -118,8 +118,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_requested_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, @@ -129,8 +129,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_transport_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -140,8 +140,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -151,8 +151,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_link_fails_count, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_fails, .extra2 = &max_fails }, @@ -162,8 +162,8 @@ static ctl_table nr_table[] = { .data = &sysctl_netrom_reset_circuit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_reset, .extra2 = &max_reset }, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c718e7e3f7d..5f94db2f3e9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -872,6 +872,7 @@ static int packet_release(struct socket *sock) write_lock_bh(&net->packet.sklist_lock); sk_del_node_init(sk); + sock_prot_inuse_add(net, sk->sk_prot, -1); write_unlock_bh(&net->packet.sklist_lock); /* @@ -1084,6 +1085,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) write_lock_bh(&net->packet.sklist_lock); sk_add_node(sk, &net->packet.sklist); + sock_prot_inuse_add(net, &packet_proto, 1); write_unlock_bh(&net->packet.sklist_lock); return(0); out: diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 9d211f12582..13cb323f8c3 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -67,9 +67,6 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) struct phonet_protocol *pnp; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -352,9 +349,6 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; - if (dev_net(dev) != &init_net) - goto out; - /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; @@ -373,7 +367,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, if (pn_sockaddr_get_addr(&sa) == 0) goto out; /* currently, we cannot be device 0 */ - sk = pn_find_sock_by_sa(&sa); + sk = pn_find_sock_by_sa(dev_net(dev), &sa); if (sk == NULL) { if (can_respond(skb)) { send_obj_unreachable(skb); diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 9978afbd9f2..b0ceac2d6cd 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -40,23 +40,17 @@ struct gprs_dev { void (*old_data_ready)(struct sock *, int); void (*old_write_space)(struct sock *); - struct net_device *net; - struct net_device_stats stats; - - struct sk_buff_head tx_queue; - struct work_struct tx_work; - spinlock_t tx_lock; - unsigned tx_max; + struct net_device *dev; }; -static int gprs_type_trans(struct sk_buff *skb) +static __be16 gprs_type_trans(struct sk_buff *skb) { const u8 *pvfc; u8 buf; pvfc = skb_header_pointer(skb, 0, 1, &buf); if (!pvfc) - return 0; + return htons(0); /* Look at IP version field */ switch (*pvfc >> 4) { case 4: @@ -64,7 +58,15 @@ static int gprs_type_trans(struct sk_buff *skb) case 6: return htons(ETH_P_IPV6); } - return 0; + return htons(0); +} + +static void gprs_writeable(struct gprs_dev *gp) +{ + struct net_device *dev = gp->dev; + + if (pep_writeable(gp->sk)) + netif_wake_queue(dev); } /* @@ -73,18 +75,21 @@ static int gprs_type_trans(struct sk_buff *skb) static void gprs_state_change(struct sock *sk) { - struct gprs_dev *dev = sk->sk_user_data; + struct gprs_dev *gp = sk->sk_user_data; if (sk->sk_state == TCP_CLOSE_WAIT) { - netif_stop_queue(dev->net); - netif_carrier_off(dev->net); + struct net_device *dev = gp->dev; + + netif_stop_queue(dev); + netif_carrier_off(dev); } } -static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) +static int gprs_recv(struct gprs_dev *gp, struct sk_buff *skb) { + struct net_device *dev = gp->dev; int err = 0; - u16 protocol = gprs_type_trans(skb); + __be16 protocol = gprs_type_trans(skb); if (!protocol) { err = -EINVAL; @@ -99,7 +104,7 @@ static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) * so wrap the IP packet as a single fragment of an head-less * socket buffer. The network stack will pull what it needs, * but at least, the whole IP payload is not memcpy'd. */ - rskb = netdev_alloc_skb(dev->net, 0); + rskb = netdev_alloc_skb(dev, 0); if (!rskb) { err = -ENOBUFS; goto drop; @@ -123,9 +128,9 @@ static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) skb->protocol = protocol; skb_reset_mac_header(skb); - skb->dev = dev->net; + skb->dev = dev; - if (likely(dev->net->flags & IFF_UP)) { + if (likely(dev->flags & IFF_UP)) { dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; netif_rx(skb); @@ -143,34 +148,46 @@ drop: static void gprs_data_ready(struct sock *sk, int len) { - struct gprs_dev *dev = sk->sk_user_data; + struct gprs_dev *gp = sk->sk_user_data; struct sk_buff *skb; while ((skb = pep_read(sk)) != NULL) { skb_orphan(skb); - gprs_recv(dev, skb); + gprs_recv(gp, skb); } } static void gprs_write_space(struct sock *sk) { - struct gprs_dev *dev = sk->sk_user_data; - unsigned credits = pep_writeable(sk); - - spin_lock_bh(&dev->tx_lock); - dev->tx_max = credits; - if (credits > skb_queue_len(&dev->tx_queue)) - netif_wake_queue(dev->net); - spin_unlock_bh(&dev->tx_lock); + struct gprs_dev *gp = sk->sk_user_data; + + if (netif_running(gp->dev)) + gprs_writeable(gp); } /* * Network device callbacks */ -static int gprs_xmit(struct sk_buff *skb, struct net_device *net) +static int gprs_open(struct net_device *dev) { - struct gprs_dev *dev = netdev_priv(net); + struct gprs_dev *gp = netdev_priv(dev); + + gprs_writeable(gp); + return 0; +} + +static int gprs_close(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +static int gprs_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct gprs_dev *gp = netdev_priv(dev); + struct sock *sk = gp->sk; + int len, err; switch (skb->protocol) { case htons(ETH_P_IP): @@ -181,82 +198,50 @@ static int gprs_xmit(struct sk_buff *skb, struct net_device *net) return 0; } - spin_lock(&dev->tx_lock); - if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) { - skb_queue_tail(&dev->tx_queue, skb); - skb = NULL; - } - if (skb_queue_len(&dev->tx_queue) >= dev->tx_max) - netif_stop_queue(net); - spin_unlock(&dev->tx_lock); - - schedule_work(&dev->tx_work); - if (unlikely(skb)) + skb_orphan(skb); + skb_set_owner_w(skb, sk); + len = skb->len; + err = pep_write(sk, skb); + if (err) { + LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", + dev->name, err); + dev->stats.tx_aborted_errors++; + dev->stats.tx_errors++; dev_kfree_skb(skb); - return 0; -} - -static void gprs_tx(struct work_struct *work) -{ - struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); - struct sock *sk = dev->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { - int err; - - dev->stats.tx_bytes += skb->len; + } else { dev->stats.tx_packets++; - - skb_orphan(skb); - skb_set_owner_w(skb, sk); - - lock_sock(sk); - err = pep_write(sk, skb); - if (err) { - LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", - dev->net->name, err); - dev->stats.tx_aborted_errors++; - dev->stats.tx_errors++; - } - release_sock(sk); + dev->stats.tx_bytes += len; } - lock_sock(sk); - gprs_write_space(sk); - release_sock(sk); + if (!pep_writeable(sk)) + netif_stop_queue(dev); + return 0; } -static int gprs_set_mtu(struct net_device *net, int new_mtu) +static int gprs_set_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) return -EINVAL; - net->mtu = new_mtu; + dev->mtu = new_mtu; return 0; } -static struct net_device_stats *gprs_get_stats(struct net_device *net) -{ - struct gprs_dev *dev = netdev_priv(net); - - return &dev->stats; -} - -static void gprs_setup(struct net_device *net) +static void gprs_setup(struct net_device *dev) { - net->features = NETIF_F_FRAGLIST; - net->type = ARPHRD_NONE; - net->flags = IFF_POINTOPOINT | IFF_NOARP; - net->mtu = GPRS_DEFAULT_MTU; - net->hard_header_len = 0; - net->addr_len = 0; - net->tx_queue_len = 10; - - net->destructor = free_netdev; - net->hard_start_xmit = gprs_xmit; /* mandatory */ - net->change_mtu = gprs_set_mtu; - net->get_stats = gprs_get_stats; + dev->features = NETIF_F_FRAGLIST; + dev->type = ARPHRD_PHONET_PIPE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->mtu = GPRS_DEFAULT_MTU; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 10; + + dev->destructor = free_netdev; + dev->open = gprs_open; + dev->stop = gprs_close; + dev->hard_start_xmit = gprs_xmit; /* mandatory */ + dev->change_mtu = gprs_set_mtu; } /* @@ -270,28 +255,25 @@ static void gprs_setup(struct net_device *net) int gprs_attach(struct sock *sk) { static const char ifname[] = "gprs%d"; - struct gprs_dev *dev; - struct net_device *net; + struct gprs_dev *gp; + struct net_device *dev; int err; if (unlikely(sk->sk_type == SOCK_STREAM)) return -EINVAL; /* need packet boundaries */ /* Create net device */ - net = alloc_netdev(sizeof(*dev), ifname, gprs_setup); - if (!net) + dev = alloc_netdev(sizeof(*gp), ifname, gprs_setup); + if (!dev) return -ENOMEM; - dev = netdev_priv(net); - dev->net = net; - dev->tx_max = 0; - spin_lock_init(&dev->tx_lock); - skb_queue_head_init(&dev->tx_queue); - INIT_WORK(&dev->tx_work, gprs_tx); - - netif_stop_queue(net); - err = register_netdev(net); + gp = netdev_priv(dev); + gp->sk = sk; + gp->dev = dev; + + netif_stop_queue(dev); + err = register_netdev(dev); if (err) { - free_netdev(net); + free_netdev(dev); return err; } @@ -305,43 +287,38 @@ int gprs_attach(struct sock *sk) err = -EINVAL; goto out_rel; } - sk->sk_user_data = dev; - dev->old_state_change = sk->sk_state_change; - dev->old_data_ready = sk->sk_data_ready; - dev->old_write_space = sk->sk_write_space; + sk->sk_user_data = gp; + gp->old_state_change = sk->sk_state_change; + gp->old_data_ready = sk->sk_data_ready; + gp->old_write_space = sk->sk_write_space; sk->sk_state_change = gprs_state_change; sk->sk_data_ready = gprs_data_ready; sk->sk_write_space = gprs_write_space; release_sock(sk); - sock_hold(sk); - dev->sk = sk; - printk(KERN_DEBUG"%s: attached\n", net->name); - gprs_write_space(sk); /* kick off TX */ - return net->ifindex; + printk(KERN_DEBUG"%s: attached\n", dev->name); + return dev->ifindex; out_rel: release_sock(sk); - unregister_netdev(net); + unregister_netdev(dev); return err; } void gprs_detach(struct sock *sk) { - struct gprs_dev *dev = sk->sk_user_data; - struct net_device *net = dev->net; + struct gprs_dev *gp = sk->sk_user_data; + struct net_device *dev = gp->dev; lock_sock(sk); sk->sk_user_data = NULL; - sk->sk_state_change = dev->old_state_change; - sk->sk_data_ready = dev->old_data_ready; - sk->sk_write_space = dev->old_write_space; + sk->sk_state_change = gp->old_state_change; + sk->sk_data_ready = gp->old_data_ready; + sk->sk_write_space = gp->old_write_space; release_sock(sk); - printk(KERN_DEBUG"%s: detached\n", net->name); - unregister_netdev(net); - flush_scheduled_work(); + printk(KERN_DEBUG"%s: detached\n", dev->name); + unregister_netdev(dev); sock_put(sk); - skb_queue_purge(&dev->tx_queue); } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index bc6d50f8324..bb3e67849b3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -225,6 +225,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); + int wake = 0; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; @@ -241,16 +242,16 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_LEGACY_FLOW_CONTROL: switch (hdr->data[4]) { case PEP_IND_BUSY: - pn->tx_credits = 0; + atomic_set(&pn->tx_credits, 0); break; case PEP_IND_READY: - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, wake = 1); break; } break; case PN_ONE_CREDIT_FLOW_CONTROL: if (hdr->data[4] == PEP_IND_READY) - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, wake = 1); break; } break; @@ -258,10 +259,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - if (pn->tx_credits + hdr->data[4] > 0xff) - pn->tx_credits = 0xff; - else - pn->tx_credits += hdr->data[4]; + atomic_add(wake = hdr->data[4], &pn->tx_credits); break; default: @@ -269,7 +267,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) (unsigned)hdr->data[1]); return -EOPNOTSUPP; } - if (pn->tx_credits) + if (wake) sk->sk_write_space(sk); return 0; } @@ -343,7 +341,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } /* fall through */ case PNS_PEP_DISABLE_REQ: - pn->tx_credits = 0; + atomic_set(&pn->tx_credits, 0); pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; @@ -390,7 +388,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) /* fall through */ case PNS_PIPE_ENABLED_IND: if (!pn_flow_safe(pn->tx_fc)) { - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, 1); sk->sk_write_space(sk); } if (sk->sk_state == TCP_ESTABLISHED) @@ -504,8 +502,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn->pn_sk.resource = pn->pn_sk.resource; skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; + atomic_set(&newpn->tx_credits, 0); newpn->peer_type = peer_type; - newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_credits = 0; newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; @@ -821,14 +820,18 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; + if (pn_flow_safe(pn->tx_fc) && + !atomic_add_unless(&pn->tx_credits, -1, 0)) { + kfree_skb(skb); + return -ENOBUFS; + } + skb_push(skb, 3); skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; - if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) - pn->tx_credits--; return pn_skb_send(sk, skb, &pipe_srv); } @@ -866,7 +869,7 @@ disabled: BUG_ON(sk->sk_state != TCP_ESTABLISHED); /* Wait until flow control allows TX */ - done = pn->tx_credits > 0; + done = atomic_read(&pn->tx_credits); while (!done) { DEFINE_WAIT(wait); @@ -881,7 +884,7 @@ disabled: prepare_to_wait(&sk->sk_socket->wait, &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); finish_wait(&sk->sk_socket->wait, &wait); if (sk->sk_state != TCP_ESTABLISHED) @@ -895,7 +898,8 @@ disabled: goto out; skb_reserve(skb, MAX_PHONET_HEADER + 3); - if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + if (sk->sk_state != TCP_ESTABLISHED || + !atomic_read(&pn->tx_credits)) goto disabled; /* sock_alloc_send_skb might sleep */ } @@ -917,7 +921,7 @@ int pep_writeable(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); - return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; + return atomic_read(&pn->tx_credits); } int pep_write(struct sock *sk, struct sk_buff *skb) diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index f93ff8ef47d..5491bf5e354 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -76,7 +76,7 @@ struct net_device *phonet_device_get(struct net *net) dev = pnd->netdev; BUG_ON(!dev); - if (dev_net(dev) == net && + if (net_eq(dev_net(dev), net) && (dev->reg_state == NETREG_REGISTERED) && ((pnd->netdev->flags & IFF_UP)) == IFF_UP) break; @@ -140,12 +140,14 @@ u8 phonet_address_get(struct net_device *dev, u8 addr) return addr; } -int phonet_address_lookup(u8 addr) +int phonet_address_lookup(struct net *net, u8 addr) { struct phonet_device *pnd; spin_lock_bh(&pndevs.lock); list_for_each_entry(pnd, &pndevs.list, list) { + if (!net_eq(dev_net(pnd->netdev), net)) + continue; /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index d81740187fb..ada2a35bf7a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -57,7 +57,7 @@ static struct { * Find address based on socket address, match only certain fields. * Also grab sock if it was found. Remember to sock_put it later. */ -struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) { struct hlist_node *node; struct sock *sknode; @@ -71,6 +71,8 @@ struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ + if (!net_eq(sock_net(sknode), net)) + continue; if (pn_port(obj)) { /* Look up socket by port */ if (pn_port(pn->sobject) != pn_port(obj)) @@ -130,7 +132,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); saddr = pn_addr(handle); - if (saddr && phonet_address_lookup(saddr)) + if (saddr && phonet_address_lookup(sock_net(sk), saddr)) return -EADDRNOTAVAIL; lock_sock(sk); @@ -225,7 +227,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; - if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; return mask; @@ -361,6 +363,7 @@ static DEFINE_MUTEX(port_mutex); int pn_sock_get_port(struct sock *sk, unsigned short sport) { static int port_cur; + struct net *net = sock_net(sk); struct pn_sock *pn = pn_sk(sk); struct sockaddr_pn try_sa; struct sock *tmpsk; @@ -381,7 +384,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) port_cur = pmin; pn_sockaddr_set_port(&try_sa, port_cur); - tmpsk = pn_find_sock_by_sa(&try_sa); + tmpsk = pn_find_sock_by_sa(net, &try_sa); if (tmpsk == NULL) { sport = port_cur; goto found; @@ -391,7 +394,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) } else { /* try to find specific port */ pn_sockaddr_set_port(&try_sa, sport); - tmpsk = pn_find_sock_by_sa(&try_sa); + tmpsk = pn_find_sock_by_sa(net, &try_sa); if (tmpsk == NULL) /* No sock there! We can use that port... */ goto found; diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 600a4309b8c..7b5749ee276 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -89,13 +89,13 @@ static struct ctl_table phonet_table[] = { .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, - .proc_handler = &proc_local_port_range, + .proc_handler = proc_local_port_range, .strategy = NULL, }, { .ctl_name = 0 } }; -struct ctl_path phonet_ctl_path[] = { +static struct ctl_path phonet_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, { }, diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index bfdade72e06..84efde97c5a 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -24,138 +24,318 @@ MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>"); MODULE_DESCRIPTION("Input layer to RF switch connector"); MODULE_LICENSE("GPL"); +enum rfkill_input_master_mode { + RFKILL_INPUT_MASTER_DONOTHING = 0, + RFKILL_INPUT_MASTER_RESTORE = 1, + RFKILL_INPUT_MASTER_UNBLOCKALL = 2, + RFKILL_INPUT_MASTER_MAX, /* marker */ +}; + +/* Delay (in ms) between consecutive switch ops */ +#define RFKILL_OPS_DELAY 200 + +static enum rfkill_input_master_mode rfkill_master_switch_mode = + RFKILL_INPUT_MASTER_UNBLOCKALL; +module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0); +MODULE_PARM_DESC(master_switch_mode, + "SW_RFKILL_ALL ON should: 0=do nothing; 1=restore; 2=unblock all"); + +enum rfkill_global_sched_op { + RFKILL_GLOBAL_OP_EPO = 0, + RFKILL_GLOBAL_OP_RESTORE, + RFKILL_GLOBAL_OP_UNLOCK, + RFKILL_GLOBAL_OP_UNBLOCK, +}; + +/* + * Currently, the code marked with RFKILL_NEED_SWSET is inactive. + * If handling of EV_SW SW_WLAN/WWAN/BLUETOOTH/etc is needed in the + * future, when such events are added, that code will be necessary. + */ + struct rfkill_task { - struct work_struct work; - enum rfkill_type type; - struct mutex mutex; /* ensures that task is serialized */ - spinlock_t lock; /* for accessing last and desired state */ - unsigned long last; /* last schedule */ - enum rfkill_state desired_state; /* on/off */ + struct delayed_work dwork; + + /* ensures that task is serialized */ + struct mutex mutex; + + /* protects everything below */ + spinlock_t lock; + + /* pending regular switch operations (1=pending) */ + unsigned long sw_pending[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + +#ifdef RFKILL_NEED_SWSET + /* set operation pending (1=pending) */ + unsigned long sw_setpending[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + + /* desired state for pending set operation (1=unblock) */ + unsigned long sw_newstate[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; +#endif + + /* should the state be complemented (1=yes) */ + unsigned long sw_togglestate[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + + bool global_op_pending; + enum rfkill_global_sched_op op; + + /* last time it was scheduled */ + unsigned long last_scheduled; }; +static void __rfkill_handle_global_op(enum rfkill_global_sched_op op) +{ + unsigned int i; + + switch (op) { + case RFKILL_GLOBAL_OP_EPO: + rfkill_epo(); + break; + case RFKILL_GLOBAL_OP_RESTORE: + rfkill_restore_states(); + break; + case RFKILL_GLOBAL_OP_UNLOCK: + rfkill_remove_epo_lock(); + break; + case RFKILL_GLOBAL_OP_UNBLOCK: + rfkill_remove_epo_lock(); + for (i = 0; i < RFKILL_TYPE_MAX; i++) + rfkill_switch_all(i, RFKILL_STATE_UNBLOCKED); + break; + default: + /* memory corruption or bug, fail safely */ + rfkill_epo(); + WARN(1, "Unknown requested operation %d! " + "rfkill Emergency Power Off activated\n", + op); + } +} + +#ifdef RFKILL_NEED_SWSET +static void __rfkill_handle_normal_op(const enum rfkill_type type, + const bool sp, const bool s, const bool c) +{ + enum rfkill_state state; + + if (sp) + state = (s) ? RFKILL_STATE_UNBLOCKED : + RFKILL_STATE_SOFT_BLOCKED; + else + state = rfkill_get_global_state(type); + + if (c) + state = rfkill_state_complement(state); + + rfkill_switch_all(type, state); +} +#else +static void __rfkill_handle_normal_op(const enum rfkill_type type, + const bool c) +{ + enum rfkill_state state; + + state = rfkill_get_global_state(type); + if (c) + state = rfkill_state_complement(state); + + rfkill_switch_all(type, state); +} +#endif + static void rfkill_task_handler(struct work_struct *work) { - struct rfkill_task *task = container_of(work, struct rfkill_task, work); + struct rfkill_task *task = container_of(work, + struct rfkill_task, dwork.work); + bool doit = true; mutex_lock(&task->mutex); - rfkill_switch_all(task->type, task->desired_state); + spin_lock_irq(&task->lock); + while (doit) { + if (task->global_op_pending) { + enum rfkill_global_sched_op op = task->op; + task->global_op_pending = false; + memset(task->sw_pending, 0, sizeof(task->sw_pending)); + spin_unlock_irq(&task->lock); + + __rfkill_handle_global_op(op); + + /* make sure we do at least one pass with + * !task->global_op_pending */ + spin_lock_irq(&task->lock); + continue; + } else if (!rfkill_is_epo_lock_active()) { + unsigned int i = 0; + + while (!task->global_op_pending && + i < RFKILL_TYPE_MAX) { + if (test_and_clear_bit(i, task->sw_pending)) { + bool c; +#ifdef RFKILL_NEED_SWSET + bool sp, s; + sp = test_and_clear_bit(i, + task->sw_setpending); + s = test_bit(i, task->sw_newstate); +#endif + c = test_and_clear_bit(i, + task->sw_togglestate); + spin_unlock_irq(&task->lock); + +#ifdef RFKILL_NEED_SWSET + __rfkill_handle_normal_op(i, sp, s, c); +#else + __rfkill_handle_normal_op(i, c); +#endif + + spin_lock_irq(&task->lock); + } + i++; + } + } + doit = task->global_op_pending; + } + spin_unlock_irq(&task->lock); mutex_unlock(&task->mutex); } -static void rfkill_task_epo_handler(struct work_struct *work) +static struct rfkill_task rfkill_task = { + .dwork = __DELAYED_WORK_INITIALIZER(rfkill_task.dwork, + rfkill_task_handler), + .mutex = __MUTEX_INITIALIZER(rfkill_task.mutex), + .lock = __SPIN_LOCK_UNLOCKED(rfkill_task.lock), +}; + +static unsigned long rfkill_ratelimit(const unsigned long last) { - rfkill_epo(); + const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); + return (time_after(jiffies, last + delay)) ? 0 : delay; } -static DECLARE_WORK(epo_work, rfkill_task_epo_handler); +static void rfkill_schedule_ratelimited(void) +{ + if (!delayed_work_pending(&rfkill_task.dwork)) { + schedule_delayed_work(&rfkill_task.dwork, + rfkill_ratelimit(rfkill_task.last_scheduled)); + rfkill_task.last_scheduled = jiffies; + } +} -static void rfkill_schedule_epo(void) +static void rfkill_schedule_global_op(enum rfkill_global_sched_op op) { - schedule_work(&epo_work); + unsigned long flags; + + spin_lock_irqsave(&rfkill_task.lock, flags); + rfkill_task.op = op; + rfkill_task.global_op_pending = true; + if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) { + /* bypass the limiter for EPO */ + cancel_delayed_work(&rfkill_task.dwork); + schedule_delayed_work(&rfkill_task.dwork, 0); + rfkill_task.last_scheduled = jiffies; + } else + rfkill_schedule_ratelimited(); + spin_unlock_irqrestore(&rfkill_task.lock, flags); } -static void rfkill_schedule_set(struct rfkill_task *task, +#ifdef RFKILL_NEED_SWSET +/* Use this if you need to add EV_SW SW_WLAN/WWAN/BLUETOOTH/etc handling */ + +static void rfkill_schedule_set(enum rfkill_type type, enum rfkill_state desired_state) { unsigned long flags; - if (unlikely(work_pending(&epo_work))) + if (rfkill_is_epo_lock_active()) return; - spin_lock_irqsave(&task->lock, flags); - - if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { - task->desired_state = desired_state; - task->last = jiffies; - schedule_work(&task->work); + spin_lock_irqsave(&rfkill_task.lock, flags); + if (!rfkill_task.global_op_pending) { + set_bit(type, rfkill_task.sw_pending); + set_bit(type, rfkill_task.sw_setpending); + clear_bit(type, rfkill_task.sw_togglestate); + if (desired_state) + set_bit(type, rfkill_task.sw_newstate); + else + clear_bit(type, rfkill_task.sw_newstate); + rfkill_schedule_ratelimited(); } - - spin_unlock_irqrestore(&task->lock, flags); + spin_unlock_irqrestore(&rfkill_task.lock, flags); } +#endif -static void rfkill_schedule_toggle(struct rfkill_task *task) +static void rfkill_schedule_toggle(enum rfkill_type type) { unsigned long flags; - if (unlikely(work_pending(&epo_work))) + if (rfkill_is_epo_lock_active()) return; - spin_lock_irqsave(&task->lock, flags); - - if (time_after(jiffies, task->last + msecs_to_jiffies(200))) { - task->desired_state = - rfkill_state_complement(task->desired_state); - task->last = jiffies; - schedule_work(&task->work); + spin_lock_irqsave(&rfkill_task.lock, flags); + if (!rfkill_task.global_op_pending) { + set_bit(type, rfkill_task.sw_pending); + change_bit(type, rfkill_task.sw_togglestate); + rfkill_schedule_ratelimited(); } - - spin_unlock_irqrestore(&task->lock, flags); + spin_unlock_irqrestore(&rfkill_task.lock, flags); } -#define DEFINE_RFKILL_TASK(n, t) \ - struct rfkill_task n = { \ - .work = __WORK_INITIALIZER(n.work, \ - rfkill_task_handler), \ - .type = t, \ - .mutex = __MUTEX_INITIALIZER(n.mutex), \ - .lock = __SPIN_LOCK_UNLOCKED(n.lock), \ - .desired_state = RFKILL_STATE_UNBLOCKED, \ - } - -static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN); -static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH); -static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB); -static DEFINE_RFKILL_TASK(rfkill_wimax, RFKILL_TYPE_WIMAX); -static DEFINE_RFKILL_TASK(rfkill_wwan, RFKILL_TYPE_WWAN); - static void rfkill_schedule_evsw_rfkillall(int state) { - /* EVERY radio type. state != 0 means radios ON */ - /* handle EPO (emergency power off) through shortcut */ if (state) { - rfkill_schedule_set(&rfkill_wwan, - RFKILL_STATE_UNBLOCKED); - rfkill_schedule_set(&rfkill_wimax, - RFKILL_STATE_UNBLOCKED); - rfkill_schedule_set(&rfkill_uwb, - RFKILL_STATE_UNBLOCKED); - rfkill_schedule_set(&rfkill_bt, - RFKILL_STATE_UNBLOCKED); - rfkill_schedule_set(&rfkill_wlan, - RFKILL_STATE_UNBLOCKED); + switch (rfkill_master_switch_mode) { + case RFKILL_INPUT_MASTER_UNBLOCKALL: + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNBLOCK); + break; + case RFKILL_INPUT_MASTER_RESTORE: + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_RESTORE); + break; + case RFKILL_INPUT_MASTER_DONOTHING: + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNLOCK); + break; + default: + /* memory corruption or driver bug! fail safely */ + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO); + WARN(1, "Unknown rfkill_master_switch_mode (%d), " + "driver bug or memory corruption detected!\n", + rfkill_master_switch_mode); + break; + } } else - rfkill_schedule_epo(); + rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO); } static void rfkill_event(struct input_handle *handle, unsigned int type, unsigned int code, int data) { if (type == EV_KEY && data == 1) { + enum rfkill_type t; + switch (code) { case KEY_WLAN: - rfkill_schedule_toggle(&rfkill_wlan); + t = RFKILL_TYPE_WLAN; break; case KEY_BLUETOOTH: - rfkill_schedule_toggle(&rfkill_bt); + t = RFKILL_TYPE_BLUETOOTH; break; case KEY_UWB: - rfkill_schedule_toggle(&rfkill_uwb); + t = RFKILL_TYPE_UWB; break; case KEY_WIMAX: - rfkill_schedule_toggle(&rfkill_wimax); + t = RFKILL_TYPE_WIMAX; break; default: - break; + return; } + rfkill_schedule_toggle(t); + return; } else if (type == EV_SW) { switch (code) { case SW_RFKILL_ALL: rfkill_schedule_evsw_rfkillall(data); - break; + return; default: - break; + return; } } } @@ -256,18 +436,23 @@ static struct input_handler rfkill_handler = { static int __init rfkill_handler_init(void) { - unsigned long last_run = jiffies - msecs_to_jiffies(500); - rfkill_wlan.last = last_run; - rfkill_bt.last = last_run; - rfkill_uwb.last = last_run; - rfkill_wimax.last = last_run; + if (rfkill_master_switch_mode >= RFKILL_INPUT_MASTER_MAX) + return -EINVAL; + + /* + * The penalty to not doing this is a possible RFKILL_OPS_DELAY delay + * at the first use. Acceptable, but if we can avoid it, why not? + */ + rfkill_task.last_scheduled = + jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1; return input_register_handler(&rfkill_handler); } static void __exit rfkill_handler_exit(void) { input_unregister_handler(&rfkill_handler); - flush_scheduled_work(); + cancel_delayed_work_sync(&rfkill_task.dwork); + rfkill_remove_epo_lock(); } module_init(rfkill_handler_init); diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h index bbfa646157c..fe8df6b5b93 100644 --- a/net/rfkill/rfkill-input.h +++ b/net/rfkill/rfkill-input.h @@ -14,5 +14,8 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); void rfkill_epo(void); void rfkill_restore_states(void); +void rfkill_remove_epo_lock(void); +bool rfkill_is_epo_lock_active(void); +enum rfkill_state rfkill_get_global_state(const enum rfkill_type type); #endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 25ba3bd57e6..3c94f76d552 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -51,51 +51,7 @@ struct rfkill_gsw_state { static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX]; static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; - -static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); - - -/** - * register_rfkill_notifier - Add notifier to rfkill notifier chain - * @nb: pointer to the new entry to add to the chain - * - * See blocking_notifier_chain_register() for return value and further - * observations. - * - * Adds a notifier to the rfkill notifier chain. The chain will be - * called with a pointer to the relevant rfkill structure as a parameter, - * refer to include/linux/rfkill.h for the possible events. - * - * Notifiers added to this chain are to always return NOTIFY_DONE. This - * chain is a blocking notifier chain: notifiers can sleep. - * - * Calls to this chain may have been done through a workqueue. One must - * assume unordered asynchronous behaviour, there is no way to know if - * actions related to the event that generated the notification have been - * carried out already. - */ -int register_rfkill_notifier(struct notifier_block *nb) -{ - BUG_ON(!nb); - return blocking_notifier_chain_register(&rfkill_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(register_rfkill_notifier); - -/** - * unregister_rfkill_notifier - remove notifier from rfkill notifier chain - * @nb: pointer to the entry to remove from the chain - * - * See blocking_notifier_chain_unregister() for return value and further - * observations. - * - * Removes a notifier from the rfkill notifier chain. - */ -int unregister_rfkill_notifier(struct notifier_block *nb) -{ - BUG_ON(!nb); - return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(unregister_rfkill_notifier); +static bool rfkill_epo_lock_active; static void rfkill_led_trigger(struct rfkill *rfkill, @@ -123,12 +79,9 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) } #endif /* CONFIG_RFKILL_LEDS */ -static void notify_rfkill_state_change(struct rfkill *rfkill) +static void rfkill_uevent(struct rfkill *rfkill) { - rfkill_led_trigger(rfkill, rfkill->state); - blocking_notifier_call_chain(&rfkill_notifier_list, - RFKILL_STATE_CHANGED, - rfkill); + kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); } static void update_rfkill_state(struct rfkill *rfkill) @@ -141,7 +94,7 @@ static void update_rfkill_state(struct rfkill *rfkill) oldstate = rfkill->state; rfkill->state = newstate; if (oldstate != newstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); } mutex_unlock(&rfkill->mutex); } @@ -219,7 +172,7 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, } if (force || rfkill->state != oldstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); return retval; } @@ -264,11 +217,14 @@ static void __rfkill_switch_all(const enum rfkill_type type, * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. + * + * Does nothing if the EPO lock is active. */ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) { mutex_lock(&rfkill_global_mutex); - __rfkill_switch_all(type, state); + if (!rfkill_epo_lock_active) + __rfkill_switch_all(type, state); mutex_unlock(&rfkill_global_mutex); } EXPORT_SYMBOL(rfkill_switch_all); @@ -289,6 +245,7 @@ void rfkill_epo(void) mutex_lock(&rfkill_global_mutex); + rfkill_epo_lock_active = true; list_for_each_entry(rfkill, &rfkill_list, node) { mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); @@ -317,6 +274,7 @@ void rfkill_restore_states(void) mutex_lock(&rfkill_global_mutex); + rfkill_epo_lock_active = false; for (i = 0; i < RFKILL_TYPE_MAX; i++) __rfkill_switch_all(i, rfkill_global_states[i].default_state); mutex_unlock(&rfkill_global_mutex); @@ -324,6 +282,48 @@ void rfkill_restore_states(void) EXPORT_SYMBOL_GPL(rfkill_restore_states); /** + * rfkill_remove_epo_lock - unlock state changes + * + * Used by rfkill-input manually unlock state changes, when + * the EPO switch is deactivated. + */ +void rfkill_remove_epo_lock(void) +{ + mutex_lock(&rfkill_global_mutex); + rfkill_epo_lock_active = false; + mutex_unlock(&rfkill_global_mutex); +} +EXPORT_SYMBOL_GPL(rfkill_remove_epo_lock); + +/** + * rfkill_is_epo_lock_active - returns true EPO is active + * + * Returns 0 (false) if there is NOT an active EPO contidion, + * and 1 (true) if there is an active EPO contition, which + * locks all radios in one of the BLOCKED states. + * + * Can be called in atomic context. + */ +bool rfkill_is_epo_lock_active(void) +{ + return rfkill_epo_lock_active; +} +EXPORT_SYMBOL_GPL(rfkill_is_epo_lock_active); + +/** + * rfkill_get_global_state - returns global state for a type + * @type: the type to get the global state of + * + * Returns the current global state for a given wireless + * device type. + */ +enum rfkill_state rfkill_get_global_state(const enum rfkill_type type) +{ + return rfkill_global_states[type].current_state; +} +EXPORT_SYMBOL_GPL(rfkill_get_global_state); + +/** * rfkill_force_state - Force the internal rfkill radio state * @rfkill: pointer to the rfkill class to modify. * @state: the current radio state the class should be forced to. @@ -357,7 +357,7 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) rfkill->state = state; if (state != oldstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); mutex_unlock(&rfkill->mutex); @@ -431,9 +431,15 @@ static ssize_t rfkill_state_store(struct device *dev, state != RFKILL_STATE_SOFT_BLOCKED) return -EINVAL; - if (mutex_lock_interruptible(&rfkill->mutex)) - return -ERESTARTSYS; - error = rfkill_toggle_radio(rfkill, state, 0); + error = mutex_lock_killable(&rfkill->mutex); + if (error) + return error; + + if (!rfkill_epo_lock_active) + error = rfkill_toggle_radio(rfkill, state, 0); + else + error = -EPERM; + mutex_unlock(&rfkill->mutex); return error ? error : count; @@ -472,12 +478,12 @@ static ssize_t rfkill_claim_store(struct device *dev, * Take the global lock to make sure the kernel is not in * the middle of rfkill_switch_all */ - error = mutex_lock_interruptible(&rfkill_global_mutex); + error = mutex_lock_killable(&rfkill_global_mutex); if (error) return error; if (rfkill->user_claim != claim) { - if (!claim) { + if (!claim && !rfkill_epo_lock_active) { mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, rfkill_global_states[rfkill->type].current_state, @@ -511,24 +517,48 @@ static void rfkill_release(struct device *dev) #ifdef CONFIG_PM static int rfkill_suspend(struct device *dev, pm_message_t state) { + struct rfkill *rfkill = to_rfkill(dev); + /* mark class device as suspended */ if (dev->power.power_state.event != state.event) dev->power.power_state = state; + /* store state for the resume handler */ + rfkill->state_for_resume = rfkill->state; + return 0; } static int rfkill_resume(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); + enum rfkill_state newstate; if (dev->power.power_state.event != PM_EVENT_ON) { mutex_lock(&rfkill->mutex); dev->power.power_state.event = PM_EVENT_ON; - /* restore radio state AND notify everybody */ - rfkill_toggle_radio(rfkill, rfkill->state, 1); + /* + * rfkill->state could have been modified before we got + * called, and won't be updated by rfkill_toggle_radio() + * in force mode. Sync it FIRST. + */ + if (rfkill->get_state && + !rfkill->get_state(rfkill->data, &newstate)) + rfkill->state = newstate; + + /* + * If we are under EPO, kick transmitter offline, + * otherwise restore to pre-suspend state. + * + * Issue a notification in any case + */ + rfkill_toggle_radio(rfkill, + rfkill_epo_lock_active ? + RFKILL_STATE_SOFT_BLOCKED : + rfkill->state_for_resume, + 1); mutex_unlock(&rfkill->mutex); } @@ -540,28 +570,6 @@ static int rfkill_resume(struct device *dev) #define rfkill_resume NULL #endif -static int rfkill_blocking_uevent_notifier(struct notifier_block *nb, - unsigned long eventid, - void *data) -{ - struct rfkill *rfkill = (struct rfkill *)data; - - switch (eventid) { - case RFKILL_STATE_CHANGED: - kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); - break; - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block rfkill_blocking_uevent_nb = { - .notifier_call = rfkill_blocking_uevent_notifier, - .priority = 0, -}; - static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) { struct rfkill *rfkill = to_rfkill(dev); @@ -711,7 +719,7 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) int error; if (!rfkill->led_trigger.name) - rfkill->led_trigger.name = rfkill->dev.bus_id; + rfkill->led_trigger.name = dev_name(&rfkill->dev); if (!rfkill->led_trigger.activate) rfkill->led_trigger.activate = rfkill_led_trigger_activate; error = led_trigger_register(&rfkill->led_trigger); @@ -752,8 +760,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) "badly initialized rfkill struct\n")) return -EINVAL; - snprintf(dev->bus_id, sizeof(dev->bus_id), - "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); + dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); rfkill_led_trigger_register(rfkill); @@ -833,6 +840,7 @@ int rfkill_set_default(enum rfkill_type type, enum rfkill_state state) if (!test_and_set_bit(type, rfkill_states_lockdflt)) { rfkill_global_states[type].default_state = state; + rfkill_global_states[type].current_state = state; error = 0; } else error = -EPERM; @@ -864,14 +872,11 @@ static int __init rfkill_init(void) return error; } - register_rfkill_notifier(&rfkill_blocking_uevent_nb); - return 0; } static void __exit rfkill_exit(void) { - unregister_rfkill_notifier(&rfkill_blocking_uevent_nb); class_unregister(&rfkill_class); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0c1cc761280..01392649b46 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -690,7 +690,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) source = &addr->srose_call; - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (user) { rose->source_call = user->call; ax25_uid_put(user); @@ -791,7 +791,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le goto out_release; } - user = ax25_findbyuid(current->euid); + user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; goto out_release; diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 20be3485a97..3bfe504faf8 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -31,8 +31,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -42,8 +42,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -53,8 +53,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -64,8 +64,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -75,8 +75,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, @@ -86,8 +86,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_ack_hold_back_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, @@ -97,8 +97,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_routing_control, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, @@ -108,8 +108,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_link_fail_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_ftimer, .extra2 = &max_ftimer }, @@ -119,8 +119,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_maximum_vcs, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_maxvcs, .extra2 = &max_maxvcs }, @@ -130,8 +130,8 @@ static ctl_table rose_table[] = { .data = &sysctl_rose_window_size, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 32e489118be..d7d2bed7a69 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -96,9 +96,9 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, switch (srx->transport.family) { case AF_INET: - _debug("INET: %x @ %u.%u.%u.%u", + _debug("INET: %x @ %pI4", ntohs(srx->transport.sin.sin_port), - NIPQUAD(srx->transport.sin.sin_addr)); + &srx->transport.sin.sin_addr); if (srx->transport_len > 8) memset((void *)&srx->transport + 8, 0, srx->transport_len - 8); diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 3869a586675..0f1218b8d28 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -826,7 +826,7 @@ static void rxrpc_destroy_connection(struct rxrpc_connection *conn) /* * reap dead connections */ -void rxrpc_connection_reaper(struct work_struct *work) +static void rxrpc_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; unsigned long now, earliest, reap_time; diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c index 1ada43d5116..dc5cb1e1950 100644 --- a/net/rxrpc/ar-connevent.c +++ b/net/rxrpc/ar-connevent.c @@ -126,7 +126,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, * mark a call as being on a now-secured channel * - must be called with softirqs disabled */ -void rxrpc_call_is_secure(struct rxrpc_call *call) +static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 6cb3e8890e7..d4d1ae26d29 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -49,8 +49,7 @@ void rxrpc_UDP_error_report(struct sock *sk) addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); port = serr->port; - _net("Rx UDP Error from "NIPQUAD_FMT":%hu", - NIPQUAD(addr), ntohs(port)); + _net("Rx UDP Error from %pI4:%hu", &addr, ntohs(port)); _debug("Msg l:%d d:%d", skb->len, skb->data_len); peer = rxrpc_find_peer(local, addr, port); diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 9a8ff684da7..ad8c7a782da 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -287,6 +287,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, time_t expiry, u32 kvno) { + const struct cred *cred = current_cred(); struct key *key; int ret; @@ -297,7 +298,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0, + key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -340,10 +341,11 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key); */ struct key *rxrpc_get_null_key(const char *keyname) { + const struct cred *cred = current_cred(); struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index f3a2bd747a8..807535ff29b 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -131,10 +131,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx) struct rxrpc_local *local; int ret; - _enter("{%d,%u,%u.%u.%u.%u+%hu}", + _enter("{%d,%u,%pI4+%hu}", srx->transport_type, srx->transport.family, - NIPQUAD(srx->transport.sin.sin_addr), + &srx->transport.sin.sin_addr, ntohs(srx->transport.sin.sin_port)); down_write(&rxrpc_local_sem); @@ -143,10 +143,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx) read_lock_bh(&rxrpc_local_lock); list_for_each_entry(local, &rxrpc_locals, link) { - _debug("CMP {%d,%u,%u.%u.%u.%u+%hu}", + _debug("CMP {%d,%u,%pI4+%hu}", local->srx.transport_type, local->srx.transport.family, - NIPQUAD(local->srx.transport.sin.sin_addr), + &local->srx.transport.sin.sin_addr, ntohs(local->srx.transport.sin.sin_port)); if (local->srx.transport_type != srx->transport_type || @@ -188,11 +188,11 @@ struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx) up_write(&rxrpc_local_sem); - _net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}", + _net("LOCAL new %d {%d,%u,%pI4+%hu}", local->debug_id, local->srx.transport_type, local->srx.transport.family, - NIPQUAD(local->srx.transport.sin.sin_addr), + &local->srx.transport.sin.sin_addr, ntohs(local->srx.transport.sin.sin_port)); _leave(" = %p [new]", local); @@ -203,11 +203,11 @@ found_local: read_unlock_bh(&rxrpc_local_lock); up_write(&rxrpc_local_sem); - _net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}", + _net("LOCAL old %d {%d,%u,%pI4+%hu}", local->debug_id, local->srx.transport_type, local->srx.transport.family, - NIPQUAD(local->srx.transport.sin.sin_addr), + &local->srx.transport.sin.sin_addr, ntohs(local->srx.transport.sin.sin_port)); _leave(" = %p [reuse]", local); diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 2abe2081a5e..edc026c1eb7 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -123,10 +123,10 @@ struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp) const char *new = "old"; int usage; - _enter("{%d,%d,%u.%u.%u.%u+%hu}", + _enter("{%d,%d,%pI4+%hu}", srx->transport_type, srx->transport_len, - NIPQUAD(srx->transport.sin.sin_addr), + &srx->transport.sin.sin_addr, ntohs(srx->transport.sin.sin_port)); /* search the peer list first */ @@ -177,12 +177,12 @@ struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp) new = "new"; success: - _net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}", + _net("PEER %s %d {%d,%u,%pI4+%hu}", new, peer->debug_id, peer->srx.transport_type, peer->srx.transport.family, - NIPQUAD(peer->srx.transport.sin.sin_addr), + &peer->srx.transport.sin.sin_addr, ntohs(peer->srx.transport.sin.sin_port)); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 017322e2786..38047f713f2 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -61,12 +61,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); trans = call->conn->trans; - sprintf(lbuff, NIPQUAD_FMT":%u", - NIPQUAD(trans->local->srx.transport.sin.sin_addr), + sprintf(lbuff, "%pI4:%u", + &trans->local->srx.transport.sin.sin_addr, ntohs(trans->local->srx.transport.sin.sin_port)); - sprintf(rbuff, NIPQUAD_FMT":%u", - NIPQUAD(trans->peer->srx.transport.sin.sin_addr), + sprintf(rbuff, "%pI4:%u", + &trans->peer->srx.transport.sin.sin_addr, ntohs(trans->peer->srx.transport.sin.sin_port)); seq_printf(seq, @@ -144,12 +144,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) conn = list_entry(v, struct rxrpc_connection, link); trans = conn->trans; - sprintf(lbuff, NIPQUAD_FMT":%u", - NIPQUAD(trans->local->srx.transport.sin.sin_addr), + sprintf(lbuff, "%pI4:%u", + &trans->local->srx.transport.sin.sin_addr, ntohs(trans->local->srx.transport.sin.sin_port)); - sprintf(rbuff, NIPQUAD_FMT":%u", - NIPQUAD(trans->peer->srx.transport.sin.sin_addr), + sprintf(rbuff, "%pI4:%u", + &trans->peer->srx.transport.sin.sin_addr, ntohs(trans->peer->srx.transport.sin.sin_port)); seq_printf(seq, diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c index 60d1d364430..dc62920ee19 100644 --- a/net/rxrpc/ar-security.c +++ b/net/rxrpc/ar-security.c @@ -40,7 +40,7 @@ static void rxrpc_security_put(struct rxrpc_security *sec) /* * look up an rxrpc security module */ -struct rxrpc_security *rxrpc_security_lookup(u8 security_index) +static struct rxrpc_security *rxrpc_security_lookup(u8 security_index) { struct rxrpc_security *sec = NULL; diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index 64069c8769a..0936e1acc30 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -78,10 +78,10 @@ struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local, const char *new = "old"; int usage; - _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},", - NIPQUAD(local->srx.transport.sin.sin_addr), + _enter("{%pI4+%hu},{%pI4+%hu},", + &local->srx.transport.sin.sin_addr, ntohs(local->srx.transport.sin.sin_port), - NIPQUAD(peer->srx.transport.sin.sin_addr), + &peer->srx.transport.sin.sin_addr, ntohs(peer->srx.transport.sin.sin_port)); /* search the transport list first */ @@ -149,10 +149,10 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local, { struct rxrpc_transport *trans; - _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},", - NIPQUAD(local->srx.transport.sin.sin_addr), + _enter("{%pI4+%hu},{%pI4+%hu},", + &local->srx.transport.sin.sin_addr, ntohs(local->srx.transport.sin.sin_port), - NIPQUAD(peer->srx.transport.sin.sin_addr), + &peer->srx.transport.sin.sin_addr, ntohs(peer->srx.transport.sin.sin_port)); /* search the transport list */ diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ba3f6e49fdd..ef8f91030a1 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -897,7 +897,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, /* get the IPv4 address of the entity that requested the ticket */ memcpy(&addr, p, sizeof(addr)); p += 4; - _debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr)); + _debug("KIV ADDR : %pI4", &addr); /* get the session key from the ticket */ memcpy(&key, p, sizeof(key)); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 6767e54155d..4f7ef0db302 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_NETEM If unsure, say N. +config NET_SCH_DRR + tristate "Deficit Round Robin scheduler (DRR)" + help + Say Y here if you want to use the Deficit Round Robin (DRR) packet + scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_drr. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -316,6 +327,17 @@ config NET_CLS_FLOW To compile this code as a module, choose M here: the module will be called cls_flow. +config NET_CLS_CGROUP + bool "Control Group Classifier" + select NET_CLS + depends on CGROUPS + ---help--- + Say Y here if you want to classify packets based on the control + cgroup of their process. + + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e60c9925b26..54d950cd4b8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o +obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o @@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o +obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8f457f1e0ac..9d03cc33b6c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, } EXPORT_SYMBOL(tcf_hash_check); -struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, + struct tc_action *a, int size, int bind, + u32 *idx_gen, struct tcf_hashinfo *hinfo) { struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) - return p; + return ERR_PTR(-ENOMEM); p->tcfc_refcnt = 1; if (bind) p->tcfc_bindcnt = 1; @@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; - if (est) - gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + if (est) { + int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + &p->tcfc_lock, est); + if (err) { + kfree(p); + return ERR_PTR(err); + } + } + a->priv = (void *) p; return p; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac04289da5d..e7f796aec65 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0453d79ebf5..082c520b0de 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, &ipt_idx_gen, &ipt_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 70341c020b6..b9aaab4e035 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, &mirred_idx_gen, &mirred_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b39ed485bc..d885ba31156 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &nat_idx_gen, &nat_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d5f4e340486..96c0ed115e2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &pedit_idx_gen, &pedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 38015b49394..5c72a116b1a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,17 +182,32 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); - if (P_tab == NULL) { - qdisc_put_rtab(R_tab); + if (P_tab == NULL) goto failure; - } } } - /* No failure allowed after this point */ + spin_lock_bh(&police->tcf_lock); + if (est) { + err = gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + &police->tcf_lock, est); + if (err) + goto failure_unlock; + } + + /* No failure allowed after this point */ if (R_tab != NULL) { qdisc_put_rtab(police->tcfp_R_tab); police->tcfp_R_tab = R_tab; @@ -217,10 +232,6 @@ override: if (tb[TCA_POLICE_AVRATE]) police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); - if (est) - gen_replace_estimator(&police->tcf_bstats, - &police->tcf_rate_est, - &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -238,7 +249,13 @@ override: a->priv = police; return ret; +failure_unlock: + spin_unlock_bh(&police->tcf_lock); failure: + if (P_tab) + qdisc_put_rtab(P_tab); + if (R_tab) + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7851ce92cf..8daa1ebc741 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_defact(pc); ret = alloc_defdata(d, defdata); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fe9777e77f3..4ab916b8074 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &skbedit_idx_gen, &skbedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_skbedit(pc); ret = ACT_P_CREATED; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 16e7ac9774e..173fcc4b050 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (src->action) { struct tc_action *act; tcf_tree_lock(tp); - act = xchg(&dst->action, src->action); + act = dst->action; + dst->action = src->action; tcf_tree_unlock(tp); if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 956915c217d..4e2bda85411 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp, static void basic_destroy(struct tcf_proto *tp) { - struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL); + struct basic_head *head = tp->root; struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c new file mode 100644 index 00000000000..0d68b197598 --- /dev/null +++ b/net/sched/cls_cgroup.c @@ -0,0 +1,288 @@ +/* + * net/sched/cls_cgroup.c Control Group Classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/cgroup.h> +#include <net/rtnetlink.h> +#include <net/pkt_cls.h> + +struct cgroup_cls_state +{ + struct cgroup_subsys_state css; + u32 classid; +}; + +static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp) +{ + return (struct cgroup_cls_state *) + cgroup_subsys_state(cgrp, net_cls_subsys_id); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_cls_state *cs; + + if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent) + cs->classid = net_cls_state(cgrp->parent)->classid; + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(ss); +} + +static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) +{ + return net_cls_state(cgrp)->classid; +} + +static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + net_cls_state(cgrp)->classid = (u32) value; + + cgroup_unlock(); + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, + .subsys_id = net_cls_subsys_id, +}; + +struct cls_cgroup_head +{ + u32 handle; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; +}; + +static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_cgroup_head *head = tp->root; + struct cgroup_cls_state *cs; + int ret = 0; + + /* + * Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitely + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (softirq_count() != SOFTIRQ_OFFSET) + return -1; + + rcu_read_lock(); + cs = (struct cgroup_cls_state *) task_subsys_state(current, + net_cls_subsys_id); + if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { + res->classid = cs->classid; + res->class = 0; + ret = tcf_exts_exec(skb, &head->exts, res); + } else + ret = -1; + + rcu_read_unlock(); + + return ret; +} + +static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +{ + return 0UL; +} + +static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_cgroup_init(struct tcf_proto *tp) +{ + return 0; +} + +static const struct tcf_ext_map cgroup_ext_map = { + .action = TCA_CGROUP_ACT, + .police = TCA_CGROUP_POLICE, +}; + +static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { + [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct cls_cgroup_head *head = tp->root; + struct tcf_ematch_tree t; + struct tcf_exts e; + int err; + + if (head == NULL) { + if (!handle) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->handle = handle; + + tcf_tree_lock(tp); + tp->root = head; + tcf_tree_unlock(tp); + } + + if (handle != head->handle) + return -ENOENT; + + err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], + cgroup_policy); + if (err < 0) + return err; + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); + if (err < 0) + return err; + + tcf_exts_change(tp, &head->exts, &e); + tcf_em_tree_change(tp, &head->ematches, &t); + + return 0; +} + +static void cls_cgroup_destroy(struct tcf_proto *tp) +{ + struct cls_cgroup_head *head = tp->root; + + if (head) { + tcf_exts_destroy(tp, &head->exts); + tcf_em_tree_destroy(tp, &head->ematches); + kfree(head); + } +} + +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_cgroup_head *head = tp->root; + + if (arg->count < arg->skip) + goto skip; + + if (arg->fn(tp, (unsigned long) head, arg) < 0) { + arg->stop = 1; + return; + } +skip: + arg->count++; +} + +static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_cgroup_head *head = tp->root; + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { + .kind = "cgroup", + .init = cls_cgroup_init, + .change = cls_cgroup_change, + .classify = cls_cgroup_classify, + .destroy = cls_cgroup_destroy, + .get = cls_cgroup_get, + .put = cls_cgroup_put, + .delete = cls_cgroup_delete, + .walk = cls_cgroup_walk, + .dump = cls_cgroup_dump, + .owner = THIS_MODULE, +}; + +static int __init init_cgroup_cls(void) +{ + return register_tcf_proto_ops(&cls_cgroup_ops); +} + +static void __exit exit_cgroup_cls(void) +{ + unregister_tcf_proto_ops(&cls_cgroup_ops); +} + +module_init(init_cgroup_cls); +module_exit(exit_cgroup_cls); +MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 0ebaff637e3..0ef4e3065bc 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -260,14 +260,14 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_uid; + return skb->sk->sk_socket->file->f_cred->fsuid; return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_gid; + return skb->sk->sk_socket->file->f_cred->fsgid; return 0; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index b0f90e593af..6d6e87585fb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) static void fw_destroy(struct tcf_proto *tp) { - struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); + struct fw_head *head = tp->root; struct fw_filter *f; int h; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index e3d8455eebc..bdf1f4172ee 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) static void route4_destroy(struct tcf_proto *tp) { - struct route4_head *head = xchg(&tp->root, NULL); + struct route4_head *head = tp->root; int h1, h2; if (head == NULL) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 7a7bff5ded2..e806f2314b5 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,12 +13,6 @@ #include <net/netlink.h> #include <net/pkt_cls.h> - -/* - * Not quite sure if we need all the xchgs Alexey uses when accessing things. - * Can always add them later ... :) - */ - /* * Passing parameters to the root seems to be done more awkwardly than really * necessary. At least, u32 doesn't seem to use such dirty hacks. To be diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 246f9065ce3..05d178008cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); + struct tc_u_hnode *root_ht = tp->root; WARN_ON(root_ht == NULL); @@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); - struct tc_u_hnode *ht_down = NULL; + struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) goto errout; @@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, } tcf_tree_lock(tp); - ht_down = xchg(&n->ht_down, ht_down); + ht_old = n->ht_down; + n->ht_down = ht_down; tcf_tree_unlock(tp); - if (ht_down) - ht_down->refcnt--; + if (ht_old) + ht_old->refcnt--; } if (tb[TCA_U32_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index e82519e548d..aab59409728 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -71,7 +71,7 @@ * * static void __exit exit_my_ematch(void) * { - * return tcf_em_unregister(&my_ops); + * tcf_em_unregister(&my_ops); * } * * module_init(init_my_ematch); @@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register); * * Returns -ENOENT if no matching ematch was found. */ -int tcf_em_unregister(struct tcf_ematch_ops *ops) +void tcf_em_unregister(struct tcf_ematch_ops *ops) { - int err = 0; - struct tcf_ematch_ops *e; - write_lock(&ematch_mod_lock); - list_for_each_entry(e, &ematch_ops, link) { - if (e == ops) { - list_del(&e->link); - goto out; - } - } - - err = -ENOENT; -out: + list_del(&ops->link); write_unlock(&ematch_mod_lock); - return err; } EXPORT_SYMBOL(tcf_em_unregister); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6ab4a2f92ca..0fc4a18fd96 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue + ---peek - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if netif_queue_stopped()=0. + like dequeue but without removing a packet from the queue ---reset @@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; + if (qops->peek == NULL) { + if (qops->dequeue == NULL) { + qops->peek = noop_qdisc_ops.peek; + } else { + rc = -EINVAL; + goto out; + } + } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; @@ -184,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc); (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) +static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; @@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -/* - * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() - * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() - */ -static DEFINE_SPINLOCK(qdisc_list_lock); - static void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); - spin_unlock_bh(&qdisc_list_lock); - } } void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_del(&q->list); - spin_unlock_bh(&qdisc_list_lock); - } } EXPORT_SYMBOL(qdisc_list_del); @@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) unsigned int i; struct Qdisc *q; - spin_lock_bh(&qdisc_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - goto unlock; + goto out; } q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); - -unlock: - spin_unlock_bh(&qdisc_list_lock); - +out: return q; } @@ -462,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; - smp_wmb(); __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; @@ -892,9 +879,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) sch->stab = stab; if (tca[TCA_RATE]) + /* NB: ignores errors from replace_estimator + because change can't be undone. */ gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 43d37256c15..2a8b83af7c4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -62,7 +62,7 @@ struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ struct atm_flow_data *flows; /* NB: "link" is also on this list */ - struct tasklet_struct task; /* requeue tasklet */ + struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ @@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, return -EINVAL; if (!new) new = &noop_qdisc; - *old = xchg(&flow->q, new); + *old = flow->q; + flow->q = new; if (*old) qdisc_reset(*old); return 0; @@ -480,11 +481,14 @@ static void sch_atm_dequeue(unsigned long data) * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ - while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) { - (void)flow->q->ops->requeue(skb, flow->q); + while ((skb = flow->q->ops->peek(flow->q))) { + if (!atm_may_send(flow->vcc, skb->truesize)) break; - } + + skb = qdisc_dequeue_peeked(flow->q); + if (unlikely(!skb)) + break; + pr_debug("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); @@ -516,27 +520,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); - skb = p->link.q->dequeue(p->link.q); + skb = qdisc_dequeue_peeked(p->link.q); if (skb) sch->q.qlen--; return skb; } -static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *atm_tc_peek(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - int ret; - pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - ret = p->link.q->ops->requeue(skb, p->link.q); - if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - p->link.qstats.drops++; - } - return ret; + pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); + + return p->link.q->ops->peek(p->link.q); } static unsigned int atm_tc_drop(struct Qdisc *sch) @@ -694,7 +690,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .priv_size = sizeof(struct atm_qdisc_data), .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, + .peek = atm_tc_peek, .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 507fb488bc9..094a874b48b 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = blackhole_enqueue, .dequeue = blackhole_dequeue, + .peek = blackhole_dequeue, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 03e389e8d94..9e43ed94916 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static int -cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int ret; - - if ((cl = q->tx_class) == NULL) { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - q->tx_class = NULL; - - cbq_mark_toplevel(q, cl); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; - cl->q->__parent = sch; -#endif - if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; -} - /* Overlimit actions */ /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ @@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = xchg(&cl->q, new); + *old = cl->q; + cl->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t } if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); + rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), + tb[TCA_CBQ_RTAB]); if (rtab == NULL) return -EINVAL; } + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + if (rtab) + qdisc_put_rtab(rtab); + return err; + } + } + /* Change class parameters */ sch_tree_lock(sch); @@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_deactivate_class(cl); if (rtab) { - rtab = xchg(&cl->R_tab, rtab); - qdisc_put_rtab(rtab); + qdisc_put_rtab(cl->R_tab); + cl->R_tab = rtab; } if (tb[TCA_CBQ_LSSOPT]) @@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl = kzalloc(sizeof(*cl), GFP_KERNEL); if (cl == NULL) goto failure; + + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + goto failure; + } + } + cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; @@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - *arg = (unsigned long)cl; return 0; @@ -2066,7 +2048,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct cbq_sched_data), .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, - .requeue = cbq_requeue, + .peek = qdisc_peek_dequeued, .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c new file mode 100644 index 00000000000..f6b4fa97df7 --- /dev/null +++ b/net/sched/sch_drr.c @@ -0,0 +1,519 @@ +/* + * net/sched/sch_drr.c Deficit Round Robin scheduler + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + +struct drr_class { + struct Qdisc_class_common common; + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct list_head alist; + struct Qdisc *qdisc; + + u32 quantum; + u32 deficit; +}; + +struct drr_sched { + struct list_head active; + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; +}; + +static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) +{ + struct drr_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct drr_class, common); +} + +static void drr_purge_queue(struct drr_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { + [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, +}; + +static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)*arg; + struct nlattr *tb[TCA_DRR_MAX + 1]; + u32 quantum; + int err; + + err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); + if (err < 0) + return err; + + if (tb[TCA_DRR_QUANTUM]) { + quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); + if (quantum == 0) + return -EINVAL; + } else + quantum = psched_mtu(qdisc_dev(sch)); + + if (cl != NULL) { + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + + sch_tree_lock(sch); + if (tb[TCA_DRR_QUANTUM]) + cl->quantum = quantum; + sch_tree_unlock(sch); + + return 0; + } + + cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + qdisc_destroy(cl->qdisc); + kfree(cl); + return err; + } + } + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) +{ + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + drr_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void drr_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); +} + +static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct drr_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + cl->filter_cnt--; +} + +static int drr_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + drr_purge_queue(cl); + *old = cl->qdisc; + cl->qdisc = new; + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + return cl->qdisc; +} + +static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); +} + +static int drr_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct tc_drr_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + if (cl->qdisc->q.qlen) + xstats.deficit = cl->deficit; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + cl = drr_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct drr_class *)res.class; + if (cl == NULL) + cl = drr_find_class(sch, res.classid); + return cl; + } + return NULL; +} + +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + int err; + + cl = drr_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + + len = qdisc_pkt_len(skb); + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + if (cl->qdisc->q.qlen == 1) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + cl->bstats.packets++; + cl->bstats.bytes += len; + sch->bstats.packets++; + sch->bstats.bytes += len; + + sch->q.qlen++; + return err; +} + +static struct sk_buff *drr_dequeue(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + if (list_empty(&q->active)) + goto out; + while (1) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (skb == NULL) + goto out; + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + sch->q.qlen--; + return skb; + } + + cl->deficit += cl->quantum; + list_move_tail(&cl->alist, &q->active); + } +out: + return NULL; +} + +static unsigned int drr_drop(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + + list_for_each_entry(cl, &q->active, alist) { + if (cl->qdisc->ops->drop) { + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + sch->q.qlen--; + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return len; + } + } + } + return 0; +} + +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct drr_sched *q = qdisc_priv(sch); + int err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + INIT_LIST_HEAD(&q->active); + return 0; +} + +static void drr_reset_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) + list_del(&cl->alist); + qdisc_reset(cl->qdisc); + } + } + sch->q.qlen = 0; +} + +static void drr_destroy_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + drr_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops drr_class_ops = { + .change = drr_change_class, + .delete = drr_delete_class, + .get = drr_get_class, + .put = drr_put_class, + .tcf_chain = drr_tcf_chain, + .bind_tcf = drr_bind_tcf, + .unbind_tcf = drr_unbind_tcf, + .graft = drr_graft_class, + .leaf = drr_class_leaf, + .qlen_notify = drr_qlen_notify, + .dump = drr_dump_class, + .dump_stats = drr_dump_class_stats, + .walk = drr_walk, +}; + +static struct Qdisc_ops drr_qdisc_ops __read_mostly = { + .cl_ops = &drr_class_ops, + .id = "drr", + .priv_size = sizeof(struct drr_sched), + .enqueue = drr_enqueue, + .dequeue = drr_dequeue, + .peek = qdisc_peek_dequeued, + .drop = drr_drop, + .init = drr_init_qdisc, + .reset = drr_reset_qdisc, + .destroy = drr_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init drr_init(void) +{ + return register_qdisc(&drr_qdisc_ops); +} + +static void __exit drr_exit(void) +{ + unregister_qdisc(&drr_qdisc_ops); +} + +module_init(drr_init); +module_exit(drr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index ba43aab3a85..d303daa45d4 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, } sch_tree_lock(sch); - *old = xchg(&p->q, new); + *old = p->q; + p->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -313,24 +314,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return skb; } -static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - - err = p->q->ops->requeue(skb, p->q); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - sch->qstats.drops++; - return err; - } - sch->q.qlen++; - sch->qstats.requeues++; + pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); - return NET_XMIT_SUCCESS; + return p->q->ops->peek(p->q); } static unsigned int dsmark_drop(struct Qdisc *sch) @@ -496,7 +486,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .priv_size = sizeof(struct dsmark_qdisc_data), .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, - .requeue = dsmark_requeue, + .peek = dsmark_peek, .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 23d258bfe8a..92cfc9d7e3b 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, @@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cdcd16fcfed..5f5efe4e607 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, @@ -311,21 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) return NULL; } -static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", - skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -340,7 +331,6 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -351,7 +341,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -367,7 +357,6 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -416,10 +405,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) { - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + if (!skb_queue_empty(list + prio)) + return skb_peek(list + prio); + } + + return NULL; } static void pfifo_fast_reset(struct Qdisc* qdisc) @@ -462,7 +458,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, - .requeue = pfifo_fast_requeue, + .peek = pfifo_fast_peek, .init = pfifo_fast_init, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, @@ -488,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -531,6 +526,9 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); + + kfree_skb(qdisc->gso_skb); + qdisc->gso_skb = NULL; } EXPORT_SYMBOL(qdisc_reset); @@ -557,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - __skb_queue_purge(&qdisc->requeue); - kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c1ad6b8de10..40408d595c0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -240,26 +240,6 @@ congestion_drop: return NET_XMIT_CN; } -static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct gred_sched *t = qdisc_priv(sch); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " - "for requeue, screwing up backlog.\n", - tc_index_to_dp(skb)); - } else { - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - q->backlog += qdisc_pkt_len(skb); - } - - return qdisc_requeue(skb, sch); -} - static struct sk_buff *gred_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, - .requeue = gred_requeue, + .peek = qdisc_peek_head, .drop = gred_drop, .init = gred_init, .reset = gred_reset, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c1e77da8cd0..45c31b1a4e1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -184,7 +184,6 @@ struct hfsc_sched struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ - struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl) */ } -/* - * hack to get length of first packet in queue. - */ static unsigned int qdisc_peek_len(struct Qdisc *sch) { struct sk_buff *skb; unsigned int len; - skb = sch->dequeue(sch); + skb = sch->ops->peek(sch); if (skb == NULL) { if (net_ratelimit()) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } len = qdisc_pkt_len(skb); - if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { - if (net_ratelimit()) - printk("qdisc_peek_len: failed to requeue\n"); - qdisc_tree_decrease_qlen(sch, 1); - return 0; - } + return len; } @@ -1027,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } cur_time = psched_get_time(); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + sch_tree_lock(sch); if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); @@ -1043,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1072,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + return err; + } + } + if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); if (fsc != NULL) @@ -1102,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1211,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); hfsc_purge_queue(sch, cl); - *old = xchg(&cl->qdisc, new); + *old = cl->qdisc; + cl->qdisc = new; sch_tree_unlock(sch); return 0; } @@ -1440,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - skb_queue_head_init(&q->requeue); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1525,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } - __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); @@ -1550,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1574,7 +1574,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - int err; + int uninitialized_var(err); cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1617,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch) if (sch->q.qlen == 0) return NULL; - if ((skb = __skb_dequeue(&q->requeue))) - goto out; cur_time = psched_get_time(); @@ -1642,7 +1640,7 @@ hfsc_dequeue(struct Qdisc *sch) } } - skb = cl->qdisc->dequeue(cl->qdisc); + skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { if (net_ratelimit()) printk("HFSC: Non-work-conserving qdisc ?\n"); @@ -1667,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - out: sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } -static int -hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - - __skb_queue_head(&q->requeue, skb); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - static unsigned int hfsc_drop(struct Qdisc *sch) { @@ -1735,7 +1721,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .dump = hfsc_dump_qdisc, .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, - .requeue = hfsc_requeue, + .peek = qdisc_peek_dequeued, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d14f02056ae..5070643ce53 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -84,12 +84,12 @@ struct htb_class { unsigned int children; struct htb_class *parent; /* parent class */ + int prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + union { struct htb_class_leaf { struct Qdisc *q; - int prio; - int aprio; - int quantum; int deficit[TC_HTB_MAXDEPTH]; struct list_head drop_list; } leaf; @@ -123,19 +123,8 @@ struct htb_class { psched_tdiff_t mbuffer; /* max wait time */ long tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ - - int prio; /* For parent to leaf return possible here */ - int quantum; /* we do backup. Finally full replacement */ - /* of un.leaf originals should be done. */ }; -static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, - int size) -{ - long result = qdisc_l2t(rate, size); - return result; -} - struct htb_sched { struct Qdisc_class_hash clhash; struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ @@ -152,9 +141,6 @@ struct htb_sched { /* time of nearest event per level (row) */ psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; - /* whether we hit non-work conserving class during this dequeue; we use */ - int nwc_hit; /* this to disable mindelay complaint in dequeue */ - int defcls; /* class where unclassified flows go to */ /* filters for qdisc itself */ @@ -527,10 +513,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { - cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); + cl->prio_activity = 1 << cl->prio; htb_activate_prios(q, cl); list_add_tail(&cl->un.leaf.drop_list, - q->drops + cl->un.leaf.aprio); + q->drops + cl->prio); } } @@ -551,7 +537,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; + int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -591,45 +577,30 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -/* TODO: requeuing packet charges it to policers again !! */ -static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) +static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff) { - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; + long toks = diff + cl->tokens; - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != - NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; - } else - htb_activate(q, cl); + if (toks > cl->buffer) + toks = cl->buffer; + toks -= (long) qdisc_l2t(cl->rate, bytes); + if (toks <= -cl->mbuffer) + toks = 1 - cl->mbuffer; - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + cl->tokens = toks; +} + +static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff) +{ + long toks = diff + cl->ctokens; + + if (toks > cl->cbuffer) + toks = cl->cbuffer; + toks -= (long) qdisc_l2t(cl->ceil, bytes); + if (toks <= -cl->mbuffer) + toks = 1 - cl->mbuffer; + + cl->ctokens = toks; } /** @@ -647,26 +618,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, int level, struct sk_buff *skb) { int bytes = qdisc_pkt_len(skb); - long toks, diff; enum htb_cmode old_mode; - -#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ - if (toks > cl->B) toks = cl->B; \ - toks -= L2T(cl, cl->R, bytes); \ - if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \ - cl->T = toks + long diff; while (cl) { diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; - HTB_ACCNT(tokens, buffer, rate); + htb_accnt_tokens(cl, bytes, diff); } else { cl->xstats.borrows++; cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT(ctokens, cbuffer, ceil); + htb_accnt_ctokens(cl, bytes, diff); cl->t_c = q->now; old_mode = cl->cmode; @@ -733,14 +698,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, while (n) { struct htb_class *cl = rb_entry(n, struct htb_class, node[prio]); - if (id == cl->common.classid) - return n; if (id > cl->common.classid) { n = n->rb_right; - } else { + } else if (id < cl->common.classid) { r = n; n = n->rb_left; + } else { + return n; } } return r; @@ -761,7 +726,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - WARN_ON(!tree->rb_node); + BUG_ON(!tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; @@ -781,9 +746,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - WARN_ON(!*sp->pptr); - if (!*sp->pptr) + if (!*sp->pptr) { + WARN_ON(1); return NULL; + } htb_next_rb_node(sp->pptr); } } else { @@ -814,8 +780,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, do { next: - WARN_ON(!cl); - if (!cl) + if (unlikely(!cl)) return NULL; /* class can be empty - it is unlikely but can be true if leaf @@ -849,7 +814,7 @@ next: cl->common.classid); cl->warned = 1; } - q->nwc_hit++; + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); cl = htb_lookup_leaf(q->row[level] + prio, prio, @@ -861,7 +826,7 @@ next: if (likely(skb != NULL)) { cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); if (cl->un.leaf.deficit[level] < 0) { - cl->un.leaf.deficit[level] += cl->un.leaf.quantum; + cl->un.leaf.deficit[level] += cl->quantum; htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); } @@ -894,7 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) q->now = psched_get_time(); next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; - q->nwc_hit = 0; + for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; @@ -1095,8 +1060,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.buffer = cl->buffer; opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; - opt.quantum = cl->un.leaf.quantum; - opt.prio = cl->un.leaf.prio; + opt.quantum = cl->quantum; + opt.prio = cl->prio; opt.level = cl->level; NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); @@ -1141,7 +1106,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, == NULL) return -ENOBUFS; sch_tree_lock(sch); - if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { + *old = cl->un.leaf.q; + cl->un.leaf.q = new; + if (*old != NULL) { qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } @@ -1198,8 +1165,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, memset(&parent->un.inner, 0, sizeof(parent->un.inner)); INIT_LIST_HEAD(&parent->un.leaf.drop_list); parent->un.leaf.q = new_q ? new_q : &noop_qdisc; - parent->un.leaf.quantum = parent->quantum; - parent->un.leaf.prio = parent->prio; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; parent->t_c = psched_get_time(); @@ -1371,9 +1336,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } + cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); @@ -1425,37 +1395,36 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent) parent->children++; } else { - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } sch_tree_lock(sch); } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; - if (!hopt->quantum && cl->un.leaf.quantum < 1000) { + cl->quantum = rtab->rate.rate / q->rate2quantum; + if (!hopt->quantum && cl->quantum < 1000) { printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->common.classid); - cl->un.leaf.quantum = 1000; + cl->quantum = 1000; } - if (!hopt->quantum && cl->un.leaf.quantum > 200000) { + if (!hopt->quantum && cl->quantum > 200000) { printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->common.classid); - cl->un.leaf.quantum = 200000; + cl->quantum = 200000; } if (hopt->quantum) - cl->un.leaf.quantum = hopt->quantum; - if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO) - cl->un.leaf.prio = TC_HTB_NUMPRIO - 1; - - /* backup for htb_parent_to_leaf */ - cl->quantum = cl->un.leaf.quantum; - cl->prio = cl->un.leaf.prio; + cl->quantum = hopt->quantum; + if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO) + cl->prio = TC_HTB_NUMPRIO - 1; } cl->buffer = hopt->buffer; @@ -1565,7 +1534,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .priv_size = sizeof(struct htb_sched), .enqueue = htb_enqueue, .dequeue = htb_dequeue, - .requeue = htb_requeue, + .peek = qdisc_peek_dequeued, .drop = htb_drop, .init = htb_init, .reset = htb_reset, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 915f3149dde..7e151861794 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct Qdisc *qdisc; - struct multiq_sched_data *q = qdisc_priv(sch); - int ret; - - qdisc = multiq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - ret = qdisc->ops->requeue(skb, qdisc); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - if (q->curband) - q->curband--; - else - q->curband = q->bands - 1; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - - static struct sk_buff *multiq_dequeue(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) q->curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { qdisc = q->queues[q->curband]; @@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) } +static struct sk_buff *multiq_peek(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned int curband = q->curband; + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + curband++; + if (curband >= q->bands) + curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid head-of-line blocking. + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { + qdisc = q->queues[curband]; + skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; + } + } + return NULL; + +} + static unsigned int multiq_drop(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -220,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } @@ -230,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, @@ -238,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -451,7 +447,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct multiq_sched_data), .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, - .requeue = multiq_requeue, + .peek = multiq_peek, .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a11959908d9..d876b873484 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -46,9 +46,6 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. - - The simulator is limited by the Linux timer resolution - and will create packet bursts on the HZ boundary (1ms). */ struct netem_sched_data { @@ -233,7 +230,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ cb->time_to_send = psched_get_time(); q->counter = 0; - ret = q->qdisc->ops->requeue(skb, q->qdisc); + + __skb_queue_head(&q->qdisc->q, skb); + q->qdisc->qstats.backlog += qdisc_pkt_len(skb); + q->qdisc->qstats.requeues++; + ret = NET_XMIT_SUCCESS; } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -248,20 +249,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Requeue packets but don't change time stamp */ -static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int netem_drop(struct Qdisc* sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -279,29 +266,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - smp_mb(); if (sch->flags & TCQ_F_THROTTLED) return NULL; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ if (cb->time_to_send <= now) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; } - if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - printk(KERN_ERR "netem: %s could not requeue\n", - q->qdisc->ops->id); - } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } @@ -344,14 +327,13 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - d = xchg(&q->delay_dist, d); + kfree(q->delay_dist); + q->delay_dist = d; spin_unlock_bh(root_lock); - - kfree(d); return 0; } -static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) +static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corr *c = nla_data(attr); @@ -359,27 +341,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->delay_cor, c->delay_corr); init_crandom(&q->loss_cor, c->loss_corr); init_crandom(&q->dup_cor, c->dup_corr); - return 0; } -static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) +static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_reorder *r = nla_data(attr); q->reorder = r->probability; init_crandom(&q->reorder_cor, r->correlation); - return 0; } -static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) +static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corrupt *r = nla_data(attr); q->corrupt = r->probability; init_crandom(&q->corrupt_cor, r->correlation); - return 0; } static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { @@ -438,11 +417,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (q->gap) q->reorder = ~0; - if (tb[TCA_NETEM_CORR]) { - ret = get_correlation(sch, tb[TCA_NETEM_CORR]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORR]) + get_correlation(sch, tb[TCA_NETEM_CORR]); if (tb[TCA_NETEM_DELAY_DIST]) { ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); @@ -450,17 +426,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } - if (tb[TCA_NETEM_REORDER]) { - ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_REORDER]) + get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (tb[TCA_NETEM_CORRUPT]) { - ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORRUPT]) + get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); return 0; } @@ -541,7 +511,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = tfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = tfifo_init, .reset = qdisc_reset_queue, @@ -624,99 +594,12 @@ nla_put_failure: return -1; } -static int netem_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (cl != 1) /* only one class */ - return -ENOENT; - - tcm->tcm_handle |= TC_H_MIN(1); - tcm->tcm_info = q->qdisc->handle; - - return 0; -} - -static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (new == NULL) - new = &noop_qdisc; - - sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - - return 0; -} - -static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct netem_sched_data *q = qdisc_priv(sch); - return q->qdisc; -} - -static unsigned long netem_get(struct Qdisc *sch, u32 classid) -{ - return 1; -} - -static void netem_put(struct Qdisc *sch, unsigned long arg) -{ -} - -static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int netem_delete(struct Qdisc *sch, unsigned long arg) -{ - return -ENOSYS; -} - -static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; - } -} - -static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) -{ - return NULL; -} - -static const struct Qdisc_class_ops netem_class_ops = { - .graft = netem_graft, - .leaf = netem_leaf, - .get = netem_get, - .put = netem_put, - .change = netem_change_class, - .delete = netem_delete, - .walk = netem_walk, - .tcf_chain = netem_find_tcf, - .dump = netem_dump_class, -}; - static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", - .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, - .requeue = netem_requeue, + .peek = qdisc_peek_dequeued, .drop = netem_drop, .init = netem_init, .reset = netem_reset, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 504a78cdb71..94cecef7014 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -prio_requeue(struct sk_buff *skb, struct Qdisc* sch) +static struct sk_buff *prio_peek(struct Qdisc *sch) { - struct Qdisc *qdisc; - int ret; - - qdisc = prio_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif + struct prio_sched_data *q = qdisc_priv(sch); + int prio; - if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + for (prio = 0; prio < q->bands; prio++) { + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; + return NULL; } - static struct sk_buff *prio_dequeue(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -201,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); @@ -211,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -421,7 +409,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .priv_size = sizeof(struct prio_sched_data), .enqueue = prio_enqueue, .dequeue = prio_dequeue, - .requeue = prio_requeue, + .peek = prio_peek, .drop = prio_drop, .init = prio_init, .reset = prio_reset, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 5da05839e22..2bdf241f631 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -108,23 +108,6 @@ congestion_drop: return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - int ret; - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - ret = child->ops->requeue(skb, child); - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->qstats.requeues++; - sch->q.qlen++; - } - return ret; -} - static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * red_peek(struct Qdisc* sch) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + static unsigned int red_drop(struct Qdisc* sch) { struct red_sched_data *q = qdisc_priv(sch); @@ -211,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, @@ -292,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -361,7 +354,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .cl_ops = &red_class_ops, .enqueue = red_enqueue, .dequeue = red_dequeue, - .requeue = red_requeue, + .peek = red_peek, .drop = red_drop, .init = red_init, .reset = red_reset, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index fe1508ef0d3..f3965df0055 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -281,7 +281,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; - int ret; + int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); if (hash == 0) { @@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff * +sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; - sfq_index x; - int ret; - - hash = sfq_classify(skb, sch, &ret); - if (hash == 0) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } - hash--; + sfq_index a; - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - - sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_head(&q->qs[x], skb); - /* If selected queue has length q->limit+1, this means that - * all another queues are empty and we do simple tail drop. - * This packet is still requeued at head of queue, tail packet - * is dropped. - */ - if (q->qs[x].qlen > q->limit) { - skb = q->qs[x].prev; - __skb_unlink(skb, &q->qs[x]); - sch->qstats.drops++; - sch->qstats.backlog -= qdisc_pkt_len(skb); - kfree_skb(skb); - return NET_XMIT_CN; - } - - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } - - if (++sch->q.qlen <= q->limit) { - sch->qstats.requeues++; - return 0; - } + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); } - - - static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { @@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, - .requeue = sfq_requeue, + .peek = sfq_peek, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 94c61598b86..a2f93c09f3c 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 0; } -static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int tbf_drop(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { psched_time_t now; @@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) toks -= L2T(q, len); if ((toks|ptoks) >= 0) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + q->t_c = now; q->tokens = toks; q->ptokens = ptoks; @@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) (cf. CSZ, HPFQ, HFSC) */ - if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { - /* When requeue fails skb is dropped */ - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - } - sch->qstats.overlimits++; } return NULL; @@ -251,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; + struct qdisc_rate_table *tmp; struct Qdisc *child = NULL; int max_size,n; @@ -299,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } q->limit = qopt->limit; q->mtu = qopt->mtu; @@ -307,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; - rtab = xchg(&q->R_tab, rtab); - ptab = xchg(&q->P_tab, ptab); + + tmp = q->R_tab; + q->R_tab = rtab; + rtab = tmp; + + tmp = q->P_tab; + q->P_tab = ptab; + ptab = tmp; sch_tree_unlock(sch); err = 0; done: @@ -398,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -469,7 +463,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, - .requeue = tbf_requeue, + .peek = qdisc_peek_dequeued, .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index d35ef059abb..cfc8e7caba6 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } -static int -teql_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct teql_sched_data *q = qdisc_priv(sch); - - __skb_queue_head(&q->q, skb); - sch->qstats.requeues++; - return 0; -} - static struct sk_buff * teql_dequeue(struct Qdisc* sch) { @@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * +teql_peek(struct Qdisc* sch) +{ + /* teql is meant to be used as root qdisc */ + return NULL; +} + static __inline__ void teql_neigh_release(struct neighbour *n) { @@ -433,7 +430,7 @@ static __init void teql_master_setup(struct net_device *dev) ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; - ops->requeue = teql_requeue; + ops->peek = teql_peek; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4124bbb9994..ceaa4aa066e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -223,10 +223,9 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, " - "src:" NIP6_FMT " dst:" NIP6_FMT "\n", + SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, - NIP6(fl.fl6_src), NIP6(fl.fl6_dst)); + &fl.fl6_src, &fl.fl6_dst); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); @@ -252,23 +251,19 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, fl.oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=" NIP6_FMT " ", - __func__, NIP6(fl.fl6_dst)); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); if (saddr) { ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK( - "SRC=" NIP6_FMT " - ", - NIP6(fl.fl6_src)); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); } dst = ip6_route_output(&init_net, NULL, &fl); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; - SCTP_DEBUG_PRINTK( - "rt6_dst:" NIP6_FMT " rt6_src:" NIP6_FMT "\n", - NIP6(rt->rt6i_dst.addr), NIP6(rt->rt6i_src.addr)); + SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", + &rt->rt6i_dst.addr, &rt->rt6i_src.addr); return dst; } SCTP_DEBUG_PRINTK("NO ROUTE\n"); @@ -314,9 +309,8 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, __u8 matchlen = 0; __u8 bmatchlen; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p " - "daddr:" NIP6_FMT " ", - __func__, asoc, dst, NIP6(daddr->v6.sin6_addr)); + SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ", + __func__, asoc, dst, &daddr->v6.sin6_addr); if (!asoc) { ipv6_dev_get_saddr(sock_net(sctp_opt2sk(sk)), @@ -324,8 +318,8 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, &daddr->v6.sin6_addr, inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", - NIP6(saddr->v6.sin6_addr)); + SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n", + &saddr->v6.sin6_addr); return; } @@ -353,12 +347,11 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, if (baddr) { memcpy(saddr, baddr, sizeof(union sctp_addr)); - SCTP_DEBUG_PRINTK("saddr: " NIP6_FMT "\n", - NIP6(saddr->v6.sin6_addr)); + SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { printk(KERN_ERR "%s: asoc:%p Could not find a valid source " - "address for the dest:" NIP6_FMT "\n", - __func__, asoc, NIP6(daddr->v6.sin6_addr)); + "address for the dest:%pI6\n", + __func__, asoc, &daddr->v6.sin6_addr); } rcu_read_unlock(); @@ -727,7 +720,7 @@ static int sctp_v6_is_ce(const struct sk_buff *skb) /* Dump the v6 addr to the seq file. */ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { - seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); + seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr); } static void sctp_v6_ecn_capable(struct sock *sk) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0b65354aaf6..b78e3be6901 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,11 +137,14 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } -out_nomem: - return -ENOMEM; +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); #else return 0; #endif /* CONFIG_PROC_FS */ + +out_nomem: + return -ENOMEM; } /* Clean up the proc fs entry for the SCTP protocol. @@ -482,9 +487,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if (saddr) fl.fl4_src = saddr->v4.sin_addr.s_addr; - SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ", - __func__, NIPQUAD(fl.fl4_dst), - NIPQUAD(fl.fl4_src)); + SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", + __func__, &fl.fl4_dst, &fl.fl4_src); if (!ip_route_output_key(&init_net, &rt, &fl)) { dst = &rt->u.dst; @@ -540,8 +544,8 @@ out_unlock: rcu_read_unlock(); out: if (dst) - SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n", - NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_src)); + SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", + &rt->rt_dst, &rt->rt_src); else SCTP_DEBUG_PRINTK("NO ROUTE\n"); @@ -646,7 +650,7 @@ static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr) /* Dump the v4 addr to the seq file. */ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { - seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); + seq_printf(seq, "%pI4 ", &addr->v4.sin_addr); } static void sctp_v4_ecn_capable(struct sock *sk) @@ -866,11 +870,10 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, { struct inet_sock *inet = inet_sk(skb->sk); - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, " - "src:%u.%u.%u.%u, dst:%u.%u.%u.%u\n", + SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, skb->len, - NIPQUAD(skb->rtable->rt_src), - NIPQUAD(skb->rtable->rt_dst)); + &skb->rtable->rt_src, + &skb->rtable->rt_dst); inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a6a0ea71ae9..1c4e5d6c29c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1123,19 +1123,17 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) printk(KERN_WARNING - "%s association %p could not find address " - NIP6_FMT "\n", + "%s association %p could not find address %pI6\n", __func__, asoc, - NIP6(from_addr.v6.sin6_addr)); + &from_addr.v6.sin6_addr); } else { if (net_ratelimit()) printk(KERN_WARNING - "%s association %p could not find address " - NIPQUAD_FMT "\n", + "%s association %p could not find address %pI4\n", __func__, asoc, - NIPQUAD(from_addr.v4.sin_addr.s_addr)); + &from_addr.v4.sin_addr.s_addr); } return SCTP_DISPOSITION_DISCARD; } @@ -3691,6 +3689,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_hdr *fwdtsn_hdr; + struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -3720,6 +3719,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) goto discard_noforce; + /* Silently discard the chunk if stream-id is not valid */ + sctp_walk_fwdtsn(skip, chunk) { + if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + goto discard_noforce; + } + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); if (len > sizeof(struct sctp_fwdtsn_hdr)) sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, @@ -3751,6 +3756,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( { struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_hdr *fwdtsn_hdr; + struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -3780,6 +3786,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( if (sctp_tsnmap_check(&asoc->peer.tsn_map, tsn) < 0) goto gen_shutdown; + /* Silently discard the chunk if stream-id is not valid */ + sctp_walk_fwdtsn(skip, chunk) { + if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + goto gen_shutdown; + } + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_FWDTSN, SCTP_U32(tsn)); if (len > sizeof(struct sctp_fwdtsn_hdr)) sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_FWDTSN, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a1b904529d5..b14a8f33e42 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -2404,9 +2404,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2778,32 +2778,77 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int op } /* - * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG) - * - * This socket option specifies the maximum size to put in any outgoing - * SCTP chunk. If a message is larger than this size it will be + * 8.1.16. Get or Set the Maximum Fragmentation Size (SCTP_MAXSEG) + * This option will get or set the maximum size to put in any outgoing + * SCTP DATA chunk. If a message is larger than this size it will be * fragmented by SCTP into the specified size. Note that the underlying * SCTP implementation may fragment into smaller sized chunks when the * PMTU of the underlying association is smaller than the value set by - * the user. + * the user. The default value for this option is '0' which indicates + * the user is NOT limiting fragmentation and only the PMTU will effect + * SCTP's choice of DATA chunk size. Note also that values set larger + * than the maximum size of an IP datagram will effectively let SCTP + * control fragmentation (i.e. the same as setting this option to 0). + * + * The following structure is used to access and modify this parameter: + * + * struct sctp_assoc_value { + * sctp_assoc_t assoc_id; + * uint32_t assoc_value; + * }; + * + * assoc_id: This parameter is ignored for one-to-one style sockets. + * For one-to-many style sockets this parameter indicates which + * association the user is performing an action upon. Note that if + * this field's value is zero then the endpoints default value is + * changed (effecting future associations only). + * assoc_value: This parameter specifies the maximum size in bytes. */ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optlen) { + struct sctp_assoc_value params; struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); int val; - if (optlen < sizeof(int)) + if (optlen == sizeof(int)) { + printk(KERN_WARNING + "SCTP: Use of int in maxseg socket option deprecated\n"); + printk(KERN_WARNING + "SCTP: Use struct sctp_assoc_value instead\n"); + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + params.assoc_id = 0; + } else if (optlen == sizeof(struct sctp_assoc_value)) { + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + val = params.assoc_value; + } else return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; + if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) return -EINVAL; - sp->user_frag = val; - /* Update the frag_point of the existing associations. */ - list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { - asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + if (val == 0) { + val = asoc->pathmtu; + val -= sp->pf->af->net_header_len; + val -= sizeof(struct sctphdr) + + sizeof(struct sctp_data_chunk); + } + + asoc->frag_point = val; + } else { + sp->user_frag = val; + + /* Update the frag_point of the existing associations. */ + list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { + asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); + } } return 0; @@ -2965,14 +3010,21 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk, } /* - * 7.1.25. Set or Get the sctp partial delivery point + * 8.1.21. Set or Get the SCTP Partial Delivery Point * (SCTP_PARTIAL_DELIVERY_POINT) + * * This option will set or get the SCTP partial delivery point. This * point is the size of a message where the partial delivery API will be * invoked to help free up rwnd space for the peer. Setting this to a - * lower value will cause partial delivery's to happen more often. The + * lower value will cause partial deliveries to happen more often. The * calls argument is an integer that sets or gets the partial delivery - * point. + * point. Note also that the call will fail if the user attempts to set + * this value larger than the socket receive buffer size. + * + * Note that any single message having a length smaller than or equal to + * the SCTP partial delivery point will be delivered in one single read + * call as long as the user provided buffer is large enough to hold the + * message. */ static int sctp_setsockopt_partial_delivery_point(struct sock *sk, char __user *optval, @@ -2985,6 +3037,12 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk, if (get_user(val, (int __user *)optval)) return -EFAULT; + /* Note: We double the receive buffer from what the user sets + * it to be, also initial rwnd is based on rcvbuf/2. + */ + if (val > (sk->sk_rcvbuf >> 1)) + return -EINVAL; + sctp_sk(sk)->pd_point = val; return 0; /* is this the right error code? */ @@ -3613,7 +3671,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); + + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + return 0; } @@ -3627,7 +3690,10 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); } /* API 4.1.7 shutdown() - TCP Style Syntax @@ -4168,9 +4234,9 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_sack_info " + printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: struct sctp_sack_info instead\n"); + printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else @@ -5092,30 +5158,69 @@ static int sctp_getsockopt_context(struct sock *sk, int len, } /* - * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG) - * - * This socket option specifies the maximum size to put in any outgoing - * SCTP chunk. If a message is larger than this size it will be + * 8.1.16. Get or Set the Maximum Fragmentation Size (SCTP_MAXSEG) + * This option will get or set the maximum size to put in any outgoing + * SCTP DATA chunk. If a message is larger than this size it will be * fragmented by SCTP into the specified size. Note that the underlying * SCTP implementation may fragment into smaller sized chunks when the * PMTU of the underlying association is smaller than the value set by - * the user. + * the user. The default value for this option is '0' which indicates + * the user is NOT limiting fragmentation and only the PMTU will effect + * SCTP's choice of DATA chunk size. Note also that values set larger + * than the maximum size of an IP datagram will effectively let SCTP + * control fragmentation (i.e. the same as setting this option to 0). + * + * The following structure is used to access and modify this parameter: + * + * struct sctp_assoc_value { + * sctp_assoc_t assoc_id; + * uint32_t assoc_value; + * }; + * + * assoc_id: This parameter is ignored for one-to-one style sockets. + * For one-to-many style sockets this parameter indicates which + * association the user is performing an action upon. Note that if + * this field's value is zero then the endpoints default value is + * changed (effecting future associations only). + * assoc_value: This parameter specifies the maximum size in bytes. */ static int sctp_getsockopt_maxseg(struct sock *sk, int len, char __user *optval, int __user *optlen) { - int val; + struct sctp_assoc_value params; + struct sctp_association *asoc; - if (len < sizeof(int)) + if (len == sizeof(int)) { + printk(KERN_WARNING + "SCTP: Use of int in maxseg socket option deprecated\n"); + printk(KERN_WARNING + "SCTP: Use struct sctp_assoc_value instead\n"); + params.assoc_id = 0; + } else if (len >= sizeof(struct sctp_assoc_value)) { + len = sizeof(struct sctp_assoc_value); + if (copy_from_user(¶ms, optval, sizeof(params))) + return -EFAULT; + } else return -EINVAL; - len = sizeof(int); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + params.assoc_value = asoc->frag_point; + else + params.assoc_value = sctp_sk(sk)->user_frag; - val = sctp_sk(sk)->user_frag; if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; + if (len == sizeof(int)) { + if (copy_to_user(optval, ¶ms.assoc_value, len)) + return -EFAULT; + } else { + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + } return 0; } @@ -5368,6 +5473,38 @@ num: return 0; } +/* + * 8.2.5. Get the Current Number of Associations (SCTP_GET_ASSOC_NUMBER) + * This option gets the current number of associations that are attached + * to a one-to-many style socket. The option value is an uint32_t. + */ +static int sctp_getsockopt_assoc_number(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + u32 val = 0; + + if (sctp_style(sk, TCP)) + return -EOPNOTSUPP; + + if (len < sizeof(u32)) + return -EINVAL; + + len = sizeof(u32); + + list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { + val++; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5510,6 +5647,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_local_auth_chunks(sk, len, optval, optlen); break; + case SCTP_GET_ASSOC_NUMBER: + retval = sctp_getsockopt_assoc_number(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 52910697e10..f58e994e685 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -63,8 +63,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -74,8 +74,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -85,8 +85,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -96,8 +96,8 @@ static ctl_table sctp_table[] = { .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -107,8 +107,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, @@ -118,8 +118,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -129,8 +129,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -138,8 +138,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -147,8 +147,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -158,8 +158,8 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, @@ -169,8 +169,8 @@ static ctl_table sctp_table[] = { .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, @@ -180,8 +180,8 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, @@ -189,8 +189,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, @@ -198,8 +198,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -207,8 +207,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -216,8 +216,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -225,8 +225,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -236,7 +236,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -244,7 +244,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -252,7 +252,7 @@ static ctl_table sctp_table[] = { .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { .ctl_name = CTL_UNNUMBERED, @@ -260,8 +260,8 @@ static ctl_table sctp_table[] = { .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = CTL_UNNUMBERED, @@ -269,8 +269,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = proc_dointvec, + .strategy = sysctl_intvec }, { .ctl_name = 0 } }; diff --git a/net/socket.c b/net/socket.c index 92764d83689..2c730fc718a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,7 +69,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> -#include <linux/thread_info.h> #include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> @@ -491,8 +490,8 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); inode->i_mode = S_IFSOCK | S_IRWXUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); @@ -2307,6 +2306,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) } (*newsock)->ops = sock->ops; + __module_get((*newsock)->ops->owner); done: return err; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index cb216b2df66..0443f834945 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int flags) { - struct auth_cred acred = { - .uid = current->fsuid, - .gid = current->fsgid, - .group_info = current->group_info, - }; + struct auth_cred acred; struct rpc_cred *ret; + const struct cred *cred = current_cred(); dprintk("RPC: looking up %s cred\n", auth->au_ops->au_name); - get_group_info(acred.group_info); + + memset(&acred, 0, sizeof(acred)); + acred.uid = cred->fsuid; + acred.gid = cred->fsgid; + acred.group_info = get_group_info(((struct cred *)cred)->group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, flags); put_group_info(acred.group_info); return ret; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4895c341e46..3ca518386d1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -271,15 +271,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)args->address; - snprintf(servername, sizeof(servername), NIPQUAD_FMT, - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(servername, sizeof(servername), "%pI4", + &sin->sin_addr.s_addr); break; } case AF_INET6: { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)args->address; - snprintf(servername, sizeof(servername), NIP6_FMT, - NIP6(sin->sin6_addr)); + snprintf(servername, sizeof(servername), "%pI6", + &sin->sin6_addr); break; } default: diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 41013dd66ac..03ae007641e 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -270,10 +270,9 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, char buf[32]; /* Construct AF_INET universal address */ - snprintf(buf, sizeof(buf), - NIPQUAD_FMT".%u.%u", - NIPQUAD(address_to_register->sin_addr.s_addr), - port >> 8, port & 0xff); + snprintf(buf, sizeof(buf), "%pI4.%u.%u", + &address_to_register->sin_addr.s_addr, + port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -305,9 +304,9 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else - snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u", - NIP6(address_to_register->sin6_addr), - port >> 8, port & 0xff); + snprintf(buf, sizeof(buf), "%pI6.%u.%u", + &address_to_register->sin6_addr, + port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -422,8 +421,8 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) struct rpc_clnt *rpcb_clnt; int status; - dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", - __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + dprintk("RPC: %s(%pI4, %u, %u, %d)\n", + __func__, &sin->sin_addr.s_addr, prog, vers, prot); rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, sizeof(*sin), prot, RPCBVERS_2); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f24800f2c09..82240e6127b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -162,13 +162,9 @@ static void ip_map_request(struct cache_detail *cd, struct ip_map *im = container_of(h, struct ip_map, h); if (ipv6_addr_v4mapped(&(im->m_addr))) { - snprintf(text_addr, 20, NIPQUAD_FMT, - ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff, - ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff, - ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, - ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); + snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]); } else { - snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr)); + snprintf(text_addr, 40, "%pI6", &im->m_addr); } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); @@ -208,13 +204,13 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) { + if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) { addr.s6_addr32[0] = 0; addr.s6_addr32[1] = 0; addr.s6_addr32[2] = htonl(0xffff); addr.s6_addr32[3] = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - } else if (sscanf(buf, NIP6_FMT "%c", + } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c", &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { addr.s6_addr16[0] = htons(b1); addr.s6_addr16[1] = htons(b2); @@ -278,16 +274,10 @@ static int ip_map_show(struct seq_file *m, dom = im->m_client->h.name; if (ipv6_addr_v4mapped(&addr)) { - seq_printf(m, "%s " NIPQUAD_FMT " %s\n", - im->m_class, - ntohl(addr.s6_addr32[3]) >> 24 & 0xff, - ntohl(addr.s6_addr32[3]) >> 16 & 0xff, - ntohl(addr.s6_addr32[3]) >> 8 & 0xff, - ntohl(addr.s6_addr32[3]) >> 0 & 0xff, - dom); + seq_printf(m, "%s %pI4 %s\n", + im->m_class, &addr.s6_addr32[3], dom); } else { - seq_printf(m, "%s " NIP6_FMT " %s\n", - im->m_class, NIP6(addr), dom); + seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom); } return 0; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a1951dcc577..ef3238d665e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -250,10 +250,10 @@ static int one_sock_name(char *buf, struct svc_sock *svsk) switch(svsk->sk_sk->sk_family) { case AF_INET: - len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n", - svsk->sk_sk->sk_protocol==IPPROTO_UDP? + len = sprintf(buf, "ipv4 %s %pI4 %d\n", + svsk->sk_sk->sk_protocol == IPPROTO_UDP ? "udp" : "tcp", - NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr), + &inet_sk(svsk->sk_sk)->rcv_saddr, inet_sk(svsk->sk_sk)->num); break; default: diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index a4756576d68..629a28764da 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -646,8 +646,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); if (ret > 0) { /* read-list posted, defer until data received from client. */ - svc_xprt_received(xprt); - return 0; + goto defer; } if (ret < 0) { /* Post of read-list failed, free context. */ @@ -679,6 +678,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) * close bit and call svc_xprt_delete */ set_bit(XPT_CLOSE, &xprt->xpt_flags); +defer: svc_xprt_received(xprt); return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9a7a8e7ae03..a3334e3b73c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -69,7 +69,7 @@ * array is only concerned with the reply we are assured that we have * on extra page for the RPCRMDA header. */ -int fast_reg_xdr(struct svcxprt_rdma *xprt, +static int fast_reg_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6fb493cbd29..3d810e7df3f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -61,7 +61,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt); static void rq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt); -DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL); +static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL); static DEFINE_SPINLOCK(dto_lock); static LIST_HEAD(dto_xprt_q); @@ -827,7 +827,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; - int dma_mr_acc; + int uninitialized_var(dma_mr_acc); int need_dma_mr; int ret; int i; @@ -1048,21 +1048,21 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: new connection %p accepted with the following " "attributes:\n" - " local_ip : %d.%d.%d.%d\n" + " local_ip : %pI4\n" " local_port : %d\n" - " remote_ip : %d.%d.%d.%d\n" + " remote_ip : %pI4\n" " remote_port : %d\n" " max_sge : %d\n" " sq_depth : %d\n" " max_requests : %d\n" " ord : %d\n", newxprt, - NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.src_addr)->sin_addr.s_addr), + &((struct sockaddr_in *)&newxprt->sc_cm_id-> + route.addr.src_addr)->sin_addr.s_addr, ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> route.addr.src_addr)->sin_port), - NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.dst_addr)->sin_addr.s_addr), + &((struct sockaddr_in *)&newxprt->sc_cm_id-> + route.addr.dst_addr)->sin_addr.s_addr, ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> route.addr.dst_addr)->sin_port), newxprt->sc_max_sge, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9839c3d9414..1dd6123070e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -174,7 +174,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) buf = kzalloc(20, GFP_KERNEL); if (buf) - snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); + snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); xprt->address_strings[RPC_DISPLAY_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); @@ -186,8 +186,8 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) buf = kzalloc(48, GFP_KERNEL); if (buf) - snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", - NIPQUAD(addr->sin_addr.s_addr), + snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", + &addr->sin_addr.s_addr, ntohs(addr->sin_port), "rdma"); xprt->address_strings[RPC_DISPLAY_ALL] = buf; @@ -204,8 +204,8 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) buf = kzalloc(30, GFP_KERNEL); if (buf) - snprintf(buf, 30, NIPQUAD_FMT".%u.%u", - NIPQUAD(addr->sin_addr.s_addr), + snprintf(buf, 30, "%pI4.%u.%u", + &addr->sin_addr.s_addr, ntohs(addr->sin_port) >> 8, ntohs(addr->sin_port) & 0xff); xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; @@ -369,8 +369,8 @@ xprt_setup_rdma(struct xprt_create *args) if (ntohs(sin->sin_port) != 0) xprt_set_bound(xprt); - dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, - NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + dprintk("RPC: %s: %pI4:%u\n", + __func__, &sin->sin_addr.s_addr, ntohs(sin->sin_port)); /* Set max requests */ cdata.max_requests = xprt->max_reqs; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a5fef5e6c32..3b21e0cc5e6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -276,7 +276,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; +#ifdef RPC_DEBUG struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; +#endif struct ib_qp_attr attr; struct ib_qp_init_attr iattr; int connstate = 0; @@ -323,12 +325,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: - dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" - " (ep 0x%p event 0x%x)\n", + dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n", __func__, (event->event <= 11) ? conn[event->event] : "unknown connection error", - NIPQUAD(addr->sin_addr.s_addr), + &addr->sin_addr.s_addr, ntohs(addr->sin_port), ep, event->event); atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); @@ -348,18 +349,17 @@ connected: if (connstate == 1) { int ird = attr.max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; - printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " + printk(KERN_INFO "rpcrdma: connection to %pI4:%u " "on %s, memreg %d slots %d ird %d%s\n", - NIPQUAD(addr->sin_addr.s_addr), + &addr->sin_addr.s_addr, ntohs(addr->sin_port), ia->ri_id->device->name, ia->ri_memreg_strategy, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); } else if (connstate < 0) { - printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " - "closed (%d)\n", - NIPQUAD(addr->sin_addr.s_addr), + printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", + &addr->sin_addr.s_addr, ntohs(addr->sin_port), connstate); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0a50361e3d8..5cbb404c4cd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -284,8 +284,7 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, NIPQUAD_FMT, - NIPQUAD(addr->sin_addr.s_addr)); + snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -300,8 +299,8 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", - NIPQUAD(addr->sin_addr.s_addr), + snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", + &addr->sin_addr.s_addr, ntohs(addr->sin_port), protocol); } @@ -323,8 +322,8 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(30, GFP_KERNEL); if (buf) { - snprintf(buf, 30, NIPQUAD_FMT".%u.%u", - NIPQUAD(addr->sin_addr.s_addr), + snprintf(buf, 30, "%pI4.%u.%u", + &addr->sin_addr.s_addr, ntohs(addr->sin_port) >> 8, ntohs(addr->sin_port) & 0xff); } @@ -342,8 +341,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(40, GFP_KERNEL); if (buf) { - snprintf(buf, 40, NIP6_FMT, - NIP6(addr->sin6_addr)); + snprintf(buf, 40, "%pI6",&addr->sin6_addr); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -358,18 +356,17 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(64, GFP_KERNEL); if (buf) { - snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", - NIP6(addr->sin6_addr), + snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", + &addr->sin6_addr, ntohs(addr->sin6_port), protocol); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; buf = kzalloc(36, GFP_KERNEL); - if (buf) { - snprintf(buf, 36, NIP6_SEQFMT, - NIP6(addr->sin6_addr)); - } + if (buf) + snprintf(buf, 36, "%pi6", &addr->sin6_addr); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; buf = kzalloc(8, GFP_KERNEL); @@ -381,10 +378,10 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(50, GFP_KERNEL); if (buf) { - snprintf(buf, 50, NIP6_FMT".%u.%u", - NIP6(addr->sin6_addr), - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); + snprintf(buf, 50, "%pI6.%u.%u", + &addr->sin6_addr, + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; @@ -1415,8 +1412,8 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", - __func__, NIPQUAD(myaddr.sin_addr), + dprintk("RPC: %s %pI4:%u: %s (%d)\n", + __func__, &myaddr.sin_addr, port, err ? "failed" : "ok", err); return err; } @@ -1448,8 +1445,8 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", - NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", + &myaddr.sin6_addr, port, err ? "failed" : "ok", err); return err; } diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index fe43ef7dd7e..f72ba774c24 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -243,12 +243,11 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { unchar *addr = (unchar *)&a->dev_addr; - DECLARE_MAC_BUF(mac); if (str_size < 18) *str_buf = '\0'; else - sprintf(str_buf, "%s", print_mac(mac, addr)); + sprintf(str_buf, "%pM", addr); return str_buf; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index cd72e22b132..acab41a48d6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -555,7 +555,7 @@ static struct name_seq *nametbl_find_seq(u32 type) struct name_seq *ns; dbg("find_seq %u,(%u,0x%x) table = %p, hash[type] = %u\n", - type, ntohl(type), type, table.types, hash(type)); + type, htonl(type), type, table.types, hash(type)); seq_head = &table.types[hash(type)]; hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 66d5ac4773a..c6250d0055d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -164,7 +164,7 @@ static inline int unix_our_peer(struct sock *sk, struct sock *osk) static inline int unix_may_send(struct sock *sk, struct sock *osk) { - return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); + return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } static inline int unix_recvq_full(struct sock const *sk) @@ -197,7 +197,7 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */ -static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) +static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) { if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; @@ -211,12 +211,12 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ - ((char *)sunaddr)[len]=0; + ((char *)sunaddr)[len] = 0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; } - *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0)); + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } @@ -295,8 +295,7 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) if (!net_eq(sock_net(s), net)) continue; - if(dentry && dentry->d_inode == i) - { + if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; } @@ -354,7 +353,7 @@ static void unix_sock_destructor(struct sock *sk) WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { - printk("Attempt to release alive unix socket: %p\n", sk); + printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk); return; } @@ -362,12 +361,16 @@ static void unix_sock_destructor(struct sock *sk) unix_release_addr(u->addr); atomic_dec(&unix_nr_socks); + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks)); + printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, + atomic_read(&unix_nr_socks)); #endif } -static int unix_release_sock (struct sock *sk, int embrion) +static int unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct dentry *dentry; @@ -392,9 +395,9 @@ static int unix_release_sock (struct sock *sk, int embrion) wake_up_interruptible_all(&u->peer_wait); - skpair=unix_peer(sk); + skpair = unix_peer(sk); - if (skpair!=NULL) { + if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); /* No more writes */ @@ -414,7 +417,7 @@ static int unix_release_sock (struct sock *sk, int embrion) /* Try to flush out this socket. Throw out buffers at least */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (state==TCP_LISTEN) + if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb); @@ -453,11 +456,11 @@ static int unix_listen(struct socket *sock, int backlog) struct unix_sock *u = unix_sk(sk); err = -EOPNOTSUPP; - if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) - goto out; /* Only stream/seqpacket sockets accept */ + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) + goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; if (!u->addr) - goto out; /* No listens on an unbound socket */ + goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; @@ -467,8 +470,7 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ sk->sk_peercred.pid = task_tgid_vnr(current); - sk->sk_peercred.uid = current->euid; - sk->sk_peercred.gid = current->egid; + current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid); err = 0; out_unlock: @@ -566,9 +568,9 @@ static const struct proto_ops unix_seqpacket_ops = { }; static struct proto unix_proto = { - .name = "UNIX", - .owner = THIS_MODULE, - .obj_size = sizeof(struct unix_sock), + .name = "UNIX", + .owner = THIS_MODULE, + .obj_size = sizeof(struct unix_sock), }; /* @@ -579,7 +581,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -592,7 +594,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) if (!sk) goto out; - sock_init_data(sock,sk); + sock_init_data(sock, sk); lockdep_set_class(&sk->sk_receive_queue.lock, &af_unix_sk_receive_queue_lock_key); @@ -611,6 +613,11 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) out: if (sk == NULL) atomic_dec(&unix_nr_socks); + else { + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + } return sk; } @@ -630,7 +637,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol) * nothing uses it. */ case SOCK_RAW: - sock->type=SOCK_DGRAM; + sock->type = SOCK_DGRAM; case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; @@ -653,7 +660,7 @@ static int unix_release(struct socket *sock) sock->sk = NULL; - return unix_release_sock (sk, 0); + return unix_release_sock(sk, 0); } static int unix_autobind(struct socket *sock) @@ -662,7 +669,7 @@ static int unix_autobind(struct socket *sock) struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; - struct unix_address * addr; + struct unix_address *addr; int err; mutex_lock(&u->readlock); @@ -681,7 +688,7 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; @@ -736,14 +743,14 @@ static struct sock *unix_find_other(struct net *net, path_put(&path); - err=-EPROTOTYPE; + err = -EPROTOTYPE; if (u->sk_type != type) { sock_put(u); goto fail; } } else { err = -ECONNREFUSED; - u=unix_find_socket_byname(net, sunname, len, type, hash); + u = unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->dentry; @@ -757,7 +764,7 @@ static struct sock *unix_find_other(struct net *net, put_fail: path_put(&path); fail: - *error=err; + *error = err; return NULL; } @@ -767,8 +774,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; - struct dentry * dentry = NULL; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + struct dentry *dentry = NULL; struct nameidata nd; int err; unsigned hash; @@ -779,7 +786,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sunaddr->sun_family != AF_UNIX) goto out; - if (addr_len==sizeof(short)) { + if (addr_len == sizeof(short)) { err = unix_autobind(sock); goto out; } @@ -875,8 +882,8 @@ out_mknod_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); out_mknod_parent: - if (err==-EEXIST) - err=-EADDRINUSE; + if (err == -EEXIST) + err = -EADDRINUSE; unix_release_addr(addr); goto out_up; } @@ -911,7 +918,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; unsigned hash; int err; @@ -927,7 +934,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out; restart: - other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err); + other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); if (!other) goto out; @@ -961,14 +968,14 @@ restart: */ if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); - unix_peer(sk)=other; + unix_peer(sk) = other; unix_state_double_unlock(sk, other); if (other != old_peer) unix_dgram_disconnected(sk, old_peer); sock_put(old_peer); } else { - unix_peer(sk)=other; + unix_peer(sk) = other; unix_state_double_unlock(sk, other); } return 0; @@ -1004,7 +1011,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { - struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk), *newu, *otheru; @@ -1126,8 +1133,7 @@ restart: newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; newsk->sk_peercred.pid = task_tgid_vnr(current); - newsk->sk_peercred.uid = current->euid; - newsk->sk_peercred.gid = current->egid; + current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); newu = unix_sk(newsk); newsk->sk_sleep = &newu->peer_wait; otheru = unix_sk(other); @@ -1179,16 +1185,17 @@ out: static int unix_socketpair(struct socket *socka, struct socket *sockb) { - struct sock *ska=socka->sk, *skb = sockb->sk; + struct sock *ska = socka->sk, *skb = sockb->sk; /* Join our sockets back to back */ sock_hold(ska); sock_hold(skb); - unix_peer(ska)=skb; - unix_peer(skb)=ska; + unix_peer(ska) = skb; + unix_peer(skb) = ska; ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); - ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; - ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; + current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); + ska->sk_peercred.uid = skb->sk_peercred.uid; + ska->sk_peercred.gid = skb->sk_peercred.gid; if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; @@ -1207,7 +1214,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) int err; err = -EOPNOTSUPP; - if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET) + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; err = -EINVAL; @@ -1246,7 +1253,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ { struct sock *sk = sock->sk; struct unix_sock *u; - struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; int err = 0; if (peer) { @@ -1286,7 +1293,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; - for (i=scm->fp->count-1; i>=0; i--) + for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } @@ -1315,7 +1322,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i=scm->fp->count-1; i>=0; i--) + for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); skb->destructor = unix_destruct_fds; return 0; @@ -1332,7 +1339,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr=msg->msg_name; + struct sockaddr_un *sunaddr = msg->msg_name; struct sock *other = NULL; int namelen = 0; /* fake GCC */ int err; @@ -1374,7 +1381,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err); - if (skb==NULL) + if (skb == NULL) goto out; memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); @@ -1386,7 +1393,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (err) goto out_free; @@ -1400,7 +1407,7 @@ restart: other = unix_find_other(net, sunaddr, namelen, sk->sk_type, hash, &err); - if (other==NULL) + if (other == NULL) goto out_free; } @@ -1420,7 +1427,7 @@ restart: err = 0; unix_state_lock(sk); if (unix_peer(sk) == other) { - unix_peer(sk)=NULL; + unix_peer(sk) = NULL; unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1486,10 +1493,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; - struct sockaddr_un *sunaddr=msg->msg_name; - int err,size; + struct sockaddr_un *sunaddr = msg->msg_name; + int err, size; struct sk_buff *skb; - int sent=0; + int sent = 0; struct scm_cookie tmp_scm; if (NULL == siocb->scm) @@ -1517,8 +1524,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (sk->sk_shutdown & SEND_SHUTDOWN) goto pipe_err; - while(sent < len) - { + while (sent < len) { /* * Optimisation for the fact that under 0.01% of X * messages typically need breaking up. @@ -1537,9 +1543,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, * Grab a buffer */ - skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, + &err); - if (skb==NULL) + if (skb == NULL) goto out_err; /* @@ -1560,7 +1567,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, } } - if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { + err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err) { kfree_skb(skb); goto out_err; } @@ -1574,7 +1582,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); other->sk_data_ready(other, size); - sent+=size; + sent += size; } scm_destroy(siocb->scm); @@ -1586,8 +1594,8 @@ pipe_err_free: unix_state_unlock(other); kfree_skb(skb); pipe_err: - if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) - send_sig(SIGPIPE,current,0); + if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: scm_destroy(siocb->scm); @@ -1677,13 +1685,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm->creds = *UNIXCREDS(skb); unix_set_secdata(siocb->scm, skb); - if (!(flags & MSG_PEEK)) - { + if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); - } - else - { + } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) - return fds, and do not return them on read (old strategy, @@ -1704,7 +1709,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, scm_recv(sock, msg, siocb->scm, flags); out_free: - skb_free_datagram(sk,skb); + skb_free_datagram(sk, skb); out_unlock: mutex_unlock(&u->readlock); out: @@ -1715,7 +1720,7 @@ out: * Sleep until data has arrive. But check for races.. */ -static long unix_stream_data_wait(struct sock * sk, long timeo) +static long unix_stream_data_wait(struct sock *sk, long timeo) { DEFINE_WAIT(wait); @@ -1753,7 +1758,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct scm_cookie tmp_scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr=msg->msg_name; + struct sockaddr_un *sunaddr = msg->msg_name; int copied = 0; int check_creds = 0; int target; @@ -1784,15 +1789,13 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&u->readlock); - do - { + do { int chunk; struct sk_buff *skb; unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); - if (skb==NULL) - { + if (skb == NULL) { if (copied >= target) goto unlock; @@ -1800,7 +1803,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, * POSIX 1003.1g mandates this order. */ - if ((err = sock_error(sk)) != 0) + err = sock_error(sk); + if (err) goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) goto unlock; @@ -1827,7 +1831,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) { + if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, + sizeof(siocb->scm->creds)) != 0) { skb_queue_head(&sk->sk_receive_queue, skb); break; } @@ -1838,8 +1843,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Copy address just once */ - if (sunaddr) - { + if (sunaddr) { unix_copy_addr(msg, skb->sk); sunaddr = NULL; } @@ -1855,16 +1859,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, size -= chunk; /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) - { + if (!(flags & MSG_PEEK)) { skb_pull(skb, chunk); if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); /* put the skb back if we didn't use it up.. */ - if (skb->len) - { + if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } @@ -1873,9 +1875,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (siocb->scm->fp) break; - } - else - { + } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) @@ -1903,7 +1903,7 @@ static int unix_shutdown(struct socket *sock, int mode) if (mode) { unix_state_lock(sk); sk->sk_shutdown |= mode; - other=unix_peer(sk); + other = unix_peer(sk); if (other) sock_hold(other); unix_state_unlock(sk); @@ -1938,16 +1938,15 @@ static int unix_shutdown(struct socket *sock, int mode) static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; - long amount=0; + long amount = 0; int err; - switch(cmd) - { - case SIOCOUTQ: - amount = atomic_read(&sk->sk_wmem_alloc); - err = put_user(amount, (int __user *)arg); - break; - case SIOCINQ: + switch (cmd) { + case SIOCOUTQ: + amount = atomic_read(&sk->sk_wmem_alloc); + err = put_user(amount, (int __user *)arg); + break; + case SIOCINQ: { struct sk_buff *skb; @@ -1964,21 +1963,21 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } else { skb = skb_peek(&sk->sk_receive_queue); if (skb) - amount=skb->len; + amount = skb->len; } spin_unlock(&sk->sk_receive_queue.lock); err = put_user(amount, (int __user *)arg); break; } - default: - err = -ENOIOCTLCMD; - break; + default: + err = -ENOIOCTLCMD; + break; } return err; } -static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) +static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask; @@ -2000,7 +1999,8 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ - if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && + sk->sk_state == TCP_CLOSE) mask |= POLLHUP; /* @@ -2096,6 +2096,7 @@ struct unix_iter_state { struct seq_net_private p; int i; }; + static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) { struct unix_iter_state *iter = seq->private; @@ -2112,7 +2113,6 @@ static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) return NULL; } - static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { @@ -2192,7 +2192,6 @@ static const struct seq_operations unix_seq_ops = { .show = unix_seq_show, }; - static int unix_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &unix_seq_ops, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index abb3ab34cb1..19c17e4a0c8 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -106,8 +106,8 @@ static struct sock *unix_get_socket(struct file *filp) * Socket ? */ if (S_ISSOCK(inode->i_mode)) { - struct socket * sock = SOCKET_I(inode); - struct sock * s = sock->sk; + struct socket *sock = SOCKET_I(inode); + struct sock *s = sock->sk; /* * PF_UNIX ? @@ -126,7 +126,7 @@ static struct sock *unix_get_socket(struct file *filp) void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); - if(s) { + if (s) { struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); if (atomic_long_inc_return(&u->inflight) == 1) { @@ -143,7 +143,7 @@ void unix_inflight(struct file *fp) void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); - if(s) { + if (s) { struct unix_sock *u = unix_sk(s); spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); @@ -156,7 +156,7 @@ void unix_notinflight(struct file *fp) static inline struct sk_buff *sock_queue_head(struct sock *sk) { - return (struct sk_buff *) &sk->sk_receive_queue; + return (struct sk_buff *)&sk->sk_receive_queue; } #define receive_queue_for_each_skb(sk, next, skb) \ @@ -370,7 +370,7 @@ void unix_gc(void) */ skb_queue_head_init(&hitlist); list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); + scan_children(&u->sk, inc_inflight, &hitlist); spin_unlock(&unix_gc_lock); diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 77513d7e35f..83c093077eb 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -21,7 +21,7 @@ static ctl_table unix_table[] = { .data = &init_net.unx.sysctl_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; @@ -61,4 +61,3 @@ void unix_sysctl_unregister(struct net *net) unregister_sysctl_table(net->unx.ctl); kfree(table); } - diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 7f07152bc10..39701dec1db 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -60,6 +60,8 @@ #define KMEM_SAFETYZONE 8 +#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) + /* * Function Prototypes */ @@ -511,7 +513,7 @@ static int wanrouter_device_shutdown(struct wan_device *wandev) if (err) return err; /* The above function deallocates the current dev - * structure. Therefore, we cannot use dev->priv + * structure. Therefore, we cannot use netdev_priv(dev) * as the next element: wandev->dev points to the * next element */ dev = wandev->dev; @@ -589,10 +591,6 @@ static int wanrouter_device_new_if(struct wan_device *wandev, err = -EPROTONOSUPPORT; goto out; } else { - dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); - err = -ENOBUFS; - if (dev == NULL) - goto out; err = wandev->new_if(wandev, dev, cnf); } @@ -622,10 +620,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev, wandev->dev = dev; } else { for (slave=wandev->dev; - *((struct net_device **)slave->priv); - slave = *((struct net_device **)slave->priv)); - - *((struct net_device **)slave->priv) = dev; + DEV_TO_SLAVE(slave); + slave = DEV_TO_SLAVE(slave)) + DEV_TO_SLAVE(slave) = dev; } ++wandev->ndev; @@ -636,15 +633,9 @@ static int wanrouter_device_new_if(struct wan_device *wandev, } if (wandev->del_if) wandev->del_if(wandev, dev); + free_netdev(dev); } - /* This code has moved from del_if() function */ - kfree(dev->priv); - dev->priv = NULL; - - /* Sync PPP is disabled */ - if (cnf->config_id != WANCONFIG_MPPP) - kfree(dev); out: kfree(cnf); return err; @@ -734,7 +725,7 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) dev = wandev->dev; prev = NULL; while (dev && strcmp(name, dev->name)) { - struct net_device **slave = dev->priv; + struct net_device **slave = netdev_priv(dev); prev = dev; dev = *slave; } @@ -751,12 +742,12 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) lock_adapter_irq(&wandev->lock, &smp_flags); if (prev) { - struct net_device **prev_slave = prev->priv; - struct net_device **slave = dev->priv; + struct net_device **prev_slave = netdev_priv(prev); + struct net_device **slave = netdev_priv(dev); *prev_slave = *slave; } else { - struct net_device **slave = dev->priv; + struct net_device **slave = netdev_priv(dev); wandev->dev = *slave; } --wandev->ndev; @@ -764,11 +755,6 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - /* Due to new interface linking method using dev->priv, - * this code has moved from del_if() function.*/ - kfree(dev->priv); - dev->priv=NULL; - unregister_netdev(dev); free_netdev(dev); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 646c7121dbc..e28e2b8fa43 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,6 +1,15 @@ config CFG80211 tristate "Improved wireless configuration API" +config CFG80211_REG_DEBUG + bool "cfg80211 regulatory debugging" + depends on CFG80211 + default n + ---help--- + You can enable this if you want to debug regulatory changes. + + If unsure, say N. + config NL80211 bool "nl80211 new netlink interface support" depends on CFG80211 @@ -40,6 +49,8 @@ config WIRELESS_OLD_REGULATORY ieee80211_regdom module parameter. This is being phased out and you should stop using them ASAP. + Note: You will need CRDA if you want 802.11d support + Say Y unless you have installed a new userspace application. Also say Y if have one currently depending on the ieee80211_regdom module parameter and cannot port it to use the new userspace @@ -72,3 +83,22 @@ config WIRELESS_EXT_SYSFS Say Y if you have programs using it, like old versions of hal. + +config LIB80211 + tristate "Common routines for IEEE802.11 drivers" + default n + help + This options enables a library of common routines used + by IEEE802.11 wireless LAN drivers. + + Drivers should select this themselves if needed. Say Y if + you want this built into your kernel. + +config LIB80211_CRYPT_WEP + tristate + +config LIB80211_CRYPT_CCMP + tristate + +config LIB80211_CRYPT_TKIP + tristate diff --git a/net/wireless/Makefile b/net/wireless/Makefile index b9f943c45f3..938a334c8db 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,5 +1,12 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o +obj-$(CONFIG_LIB80211) += lib80211.o +obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o +obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o +obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o +cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o cfg80211-$(CONFIG_NL80211) += nl80211.o + +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/core.c b/net/wireless/core.c index 5031db7b275..b96fc0c3f1c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -19,7 +19,6 @@ #include "nl80211.h" #include "core.h" #include "sysfs.h" -#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -236,8 +235,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_unlock(&cfg80211_drv_mutex); /* give it a proper name */ - snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, - PHY_NAME "%d", drv->idx); + dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); @@ -301,12 +299,10 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); + mutex_lock(&cfg80211_drv_mutex); + /* set up regulatory info */ - mutex_lock(&cfg80211_reg_mutex); wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); - mutex_unlock(&cfg80211_reg_mutex); - - mutex_lock(&cfg80211_drv_mutex); res = device_add(&drv->wiphy.dev); if (res) @@ -351,6 +347,10 @@ void wiphy_unregister(struct wiphy *wiphy) /* unlock again before freeing */ mutex_unlock(&drv->mtx); + /* If this device got a regulatory hint tell core its + * free to listen now to a new shiny device regulatory hint */ + reg_device_remove(wiphy); + list_del(&drv->list); device_del(&drv->wiphy.dev); debugfs_remove(drv->wiphy.debugfsdir); diff --git a/net/wireless/core.h b/net/wireless/core.h index 771cc5cc765..f7fb9f41302 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -11,6 +11,7 @@ #include <net/genetlink.h> #include <net/wireless.h> #include <net/cfg80211.h> +#include "reg.h" struct cfg80211_registered_device { struct cfg80211_ops *ops; @@ -21,6 +22,18 @@ struct cfg80211_registered_device { * any call is in progress */ struct mutex mtx; + /* ISO / IEC 3166 alpha2 for which this device is receiving + * country IEs on, this can help disregard country IEs from APs + * on the same alpha2 quickly. The alpha2 may differ from + * cfg80211_regdomain's alpha2 when an intersection has occurred. + * If the AP is reconfigured this can also be used to tell us if + * the country on the country IE changed. */ + char country_ie_alpha2[2]; + + /* If a Country IE has been received this tells us the environment + * which its telling us its in. This defaults to ENVIRON_ANY */ + enum environment_cap env; + /* wiphy index, internal only */ int idx; diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c new file mode 100644 index 00000000000..97d411f7450 --- /dev/null +++ b/net/wireless/lib80211.c @@ -0,0 +1,284 @@ +/* + * lib80211 -- common bits for IEEE802.11 drivers + * + * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com> + * + * Portions copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Host AP crypto routines + * + * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> + * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com> + * + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/ieee80211.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/string.h> + +#include <net/lib80211.h> + +#define DRV_NAME "lib80211" + +#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers" + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); +MODULE_LICENSE("GPL"); + +struct lib80211_crypto_alg { + struct list_head list; + struct lib80211_crypto_ops *ops; +}; + +static LIST_HEAD(lib80211_crypto_algs); +static DEFINE_SPINLOCK(lib80211_crypto_lock); + +const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) +{ + const char *s = ssid; + char *d = buf; + + ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN); + while (ssid_len--) { + if (isprint(*s)) { + *d++ = *s++; + continue; + } + + *d++ = '\\'; + if (*s == '\0') + *d++ = '0'; + else if (*s == '\n') + *d++ = 'n'; + else if (*s == '\r') + *d++ = 'r'; + else if (*s == '\t') + *d++ = 't'; + else if (*s == '\\') + *d++ = '\\'; + else + d += snprintf(d, 3, "%03o", *s); + s++; + } + *d = '\0'; + return buf; +} +EXPORT_SYMBOL(print_ssid); + +int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, + spinlock_t *lock) +{ + memset(info, 0, sizeof(*info)); + + info->name = name; + info->lock = lock; + + INIT_LIST_HEAD(&info->crypt_deinit_list); + setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, + (unsigned long)info); + + return 0; +} +EXPORT_SYMBOL(lib80211_crypt_info_init); + +void lib80211_crypt_info_free(struct lib80211_crypt_info *info) +{ + int i; + + lib80211_crypt_quiescing(info); + del_timer_sync(&info->crypt_deinit_timer); + lib80211_crypt_deinit_entries(info, 1); + + for (i = 0; i < NUM_WEP_KEYS; i++) { + struct lib80211_crypt_data *crypt = info->crypt[i]; + if (crypt) { + if (crypt->ops) { + crypt->ops->deinit(crypt->priv); + module_put(crypt->ops->owner); + } + kfree(crypt); + info->crypt[i] = NULL; + } + } +} +EXPORT_SYMBOL(lib80211_crypt_info_free); + +void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) +{ + struct lib80211_crypt_data *entry, *next; + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { + if (atomic_read(&entry->refcnt) != 0 && !force) + continue; + + list_del(&entry->list); + + if (entry->ops) { + entry->ops->deinit(entry->priv); + module_put(entry->ops->owner); + } + kfree(entry); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_entries); + +/* After this, crypt_deinit_list won't accept new members */ +void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + info->crypt_quiesced = 1; + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_quiescing); + +void lib80211_crypt_deinit_handler(unsigned long data) +{ + struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; + unsigned long flags; + + lib80211_crypt_deinit_entries(info, 0); + + spin_lock_irqsave(info->lock, flags); + if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { + printk(KERN_DEBUG "%s: entries remaining in delayed crypt " + "deletion list\n", info->name); + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_handler); + +void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, + struct lib80211_crypt_data **crypt) +{ + struct lib80211_crypt_data *tmp; + unsigned long flags; + + if (*crypt == NULL) + return; + + tmp = *crypt; + *crypt = NULL; + + /* must not run ops->deinit() while there may be pending encrypt or + * decrypt operations. Use a list of delayed deinits to avoid needing + * locking. */ + + spin_lock_irqsave(info->lock, flags); + if (!info->crypt_quiesced) { + list_add(&tmp->list, &info->crypt_deinit_list); + if (!timer_pending(&info->crypt_deinit_timer)) { + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_delayed_deinit); + +int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops) +{ + unsigned long flags; + struct lib80211_crypto_alg *alg; + + alg = kzalloc(sizeof(*alg), GFP_KERNEL); + if (alg == NULL) + return -ENOMEM; + + alg->ops = ops; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_add(&alg->list, &lib80211_crypto_algs); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + + printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n", + ops->name); + + return 0; +} +EXPORT_SYMBOL(lib80211_register_crypto_ops); + +int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (alg->ops == ops) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return -EINVAL; + + found: + printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " + "'%s'\n", ops->name); + list_del(&alg->list); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + kfree(alg); + return 0; +} +EXPORT_SYMBOL(lib80211_unregister_crypto_ops); + +struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (strcmp(alg->ops->name, name) == 0) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return NULL; + + found: + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return alg->ops; +} +EXPORT_SYMBOL(lib80211_get_crypto_ops); + +static void *lib80211_crypt_null_init(int keyidx) +{ + return (void *)1; +} + +static void lib80211_crypt_null_deinit(void *priv) +{ +} + +static struct lib80211_crypto_ops lib80211_crypt_null = { + .name = "NULL", + .init = lib80211_crypt_null_init, + .deinit = lib80211_crypt_null_deinit, + .owner = THIS_MODULE, +}; + +static int __init lib80211_init(void) +{ + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + return lib80211_register_crypto_ops(&lib80211_crypt_null); +} + +static void __exit lib80211_exit(void) +{ + lib80211_unregister_crypto_ops(&lib80211_crypt_null); + BUG_ON(!list_empty(&lib80211_crypto_algs)); +} + +module_init(lib80211_init); +module_exit(lib80211_exit); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 208bf35b554..db428194c16 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based CCMP encryption implementation for Host AP driver + * lib80211 crypt: host-based CCMP encryption implementation for lib80211 * * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,10 +23,12 @@ #include <asm/string.h> #include <linux/wireless.h> -#include <net/ieee80211.h> +#include <linux/ieee80211.h> #include <linux/crypto.h> +#include <net/lib80211.h> + MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Host AP crypt: CCMP"); MODULE_LICENSE("GPL"); @@ -36,7 +39,7 @@ MODULE_LICENSE("GPL"); #define CCMP_TK_LEN 16 #define CCMP_PN_LEN 6 -struct ieee80211_ccmp_data { +struct lib80211_ccmp_data { u8 key[CCMP_TK_LEN]; int key_set; @@ -57,15 +60,15 @@ struct ieee80211_ccmp_data { u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; }; -static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, +static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, const u8 pt[16], u8 ct[16]) { crypto_cipher_encrypt_one(tfm, ct, pt); } -static void *ieee80211_ccmp_init(int key_idx) +static void *lib80211_ccmp_init(int key_idx) { - struct ieee80211_ccmp_data *priv; + struct lib80211_ccmp_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -74,7 +77,7 @@ static void *ieee80211_ccmp_init(int key_idx) priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_ccmp: could not allocate " "crypto API aes\n"); priv->tfm = NULL; goto fail; @@ -92,9 +95,9 @@ static void *ieee80211_ccmp_init(int key_idx) return NULL; } -static void ieee80211_ccmp_deinit(void *priv) +static void lib80211_ccmp_deinit(void *priv) { - struct ieee80211_ccmp_data *_priv = priv; + struct lib80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) crypto_free_cipher(_priv->tfm); kfree(priv); @@ -108,20 +111,17 @@ static inline void xor_block(u8 * b, u8 * a, size_t len) } static void ccmp_init_blocks(struct crypto_cipher *tfm, - struct ieee80211_hdr_4addr *hdr, + struct ieee80211_hdr *hdr, u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { u8 *pos, qc = 0; size_t aad_len; - u16 fc; int a4_included, qc_included; u8 aad[2 * AES_BLOCK_LEN]; - fc = le16_to_cpu(hdr->frame_ctl); - a4_included = ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)); - qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) && - (WLAN_FC_GET_STYPE(fc) & IEEE80211_STYPE_QOS_DATA)); + a4_included = ieee80211_has_a4(hdr->frame_control); + qc_included = ieee80211_is_data_qos(hdr->frame_control); + aad_len = 22; if (a4_included) aad_len += 6; @@ -158,7 +158,7 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, aad[2] = pos[0] & 0x8f; aad[3] = pos[1] & 0xc7; memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); - pos = (u8 *) & hdr->seq_ctl; + pos = (u8 *) & hdr->seq_ctrl; aad[22] = pos[0] & 0x0f; aad[23] = 0; /* all bits masked */ memset(aad + 24, 0, 8); @@ -170,20 +170,20 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, } /* Start with the first block and AAD */ - ieee80211_ccmp_aes_encrypt(tfm, b0, auth); + lib80211_ccmp_aes_encrypt(tfm, b0, auth); xor_block(auth, aad, AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); b0[0] &= 0x07; b0[14] = b0[15] = 0; - ieee80211_ccmp_aes_encrypt(tfm, b0, s0); + lib80211_ccmp_aes_encrypt(tfm, b0, s0); } -static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, +static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, u8 *aeskey, int keylen, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; int i; u8 *pos; @@ -217,12 +217,12 @@ static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, return CCMP_HDR_LEN; } -static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; int data_len, i, blocks, last, len; u8 *pos, *mic; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 *b0 = key->tx_b0; u8 *b = key->tx_b; u8 *e = key->tx_e; @@ -232,13 +232,13 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; data_len = skb->len - hdr_len; - len = ieee80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); + len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); if (len < 0) return -1; pos = skb->data + hdr_len + CCMP_HDR_LEN; mic = skb_put(skb, CCMP_MIC_LEN); - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); @@ -248,11 +248,11 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) len = (i == blocks && last) ? last : AES_BLOCK_LEN; /* Authentication */ xor_block(b, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, b, b); + lib80211_ccmp_aes_encrypt(key->tfm, b, b); /* Encryption, with counter */ b0[14] = (i >> 8) & 0xff; b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, e); + lib80211_ccmp_aes_encrypt(key->tfm, b0, e); xor_block(pos, e, len); pos += len; } @@ -284,11 +284,11 @@ static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) return 0; } -static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_ccmp_data *key = priv; + struct lib80211_ccmp_data *key = priv; u8 keyidx, *pos; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 *b0 = key->rx_b0; u8 *b = key->rx_b; u8 *a = key->rx_a; @@ -296,20 +296,19 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) int i, blocks, last, len; size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; - DECLARE_MAC_BUF(mac); if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { key->dot11RSNAStatsCCMPFormatErrors++; return -1; } - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: received packet without ExtIV" - " flag from %s\n", print_mac(mac, hdr->addr2)); + " flag from %pM\n", hdr->addr2); } key->dot11RSNAStatsCCMPFormatErrors++; return -2; @@ -322,9 +321,9 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!key->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: received packet from %s" + printk(KERN_DEBUG "CCMP: received packet from %pM" " with keyid=%d that does not have a configured" - " key\n", print_mac(mac, hdr->addr2), keyidx); + " key\n", hdr->addr2, keyidx); } return -3; } @@ -338,11 +337,11 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (ccmp_replay_check(pn, key->rx_pn)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s " + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " "previous PN %02x%02x%02x%02x%02x%02x " "received PN %02x%02x%02x%02x%02x%02x\n", - print_mac(mac, hdr->addr2), + hdr->addr2, key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); @@ -362,18 +361,18 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) /* Decrypt, with counter */ b0[14] = (i >> 8) & 0xff; b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, b); + lib80211_ccmp_aes_encrypt(key->tfm, b0, b); xor_block(pos, b, len); /* Authentication */ xor_block(a, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, a, a); + lib80211_ccmp_aes_encrypt(key->tfm, a, a); pos += len; } if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: decrypt failed: STA=" - "%s\n", print_mac(mac, hdr->addr2)); + "%pM\n", hdr->addr2); } key->dot11RSNAStatsCCMPDecryptErrors++; return -5; @@ -389,9 +388,9 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } -static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_ccmp_data *data = priv; + struct lib80211_ccmp_data *data = priv; int keyidx; struct crypto_cipher *tfm = data->tfm; @@ -419,9 +418,9 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_ccmp_data *data = priv; + struct lib80211_ccmp_data *data = priv; if (len < CCMP_TK_LEN) return -1; @@ -442,9 +441,9 @@ static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) return CCMP_TK_LEN; } -static char *ieee80211_ccmp_print_stats(char *p, void *priv) +static char *lib80211_ccmp_print_stats(char *p, void *priv) { - struct ieee80211_ccmp_data *ccmp = priv; + struct lib80211_ccmp_data *ccmp = priv; p += sprintf(p, "key[%d] alg=CCMP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " @@ -462,32 +461,32 @@ static char *ieee80211_ccmp_print_stats(char *p, void *priv) return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { +static struct lib80211_crypto_ops lib80211_crypt_ccmp = { .name = "CCMP", - .init = ieee80211_ccmp_init, - .deinit = ieee80211_ccmp_deinit, - .build_iv = ieee80211_ccmp_hdr, - .encrypt_mpdu = ieee80211_ccmp_encrypt, - .decrypt_mpdu = ieee80211_ccmp_decrypt, + .init = lib80211_ccmp_init, + .deinit = lib80211_ccmp_deinit, + .build_iv = lib80211_ccmp_hdr, + .encrypt_mpdu = lib80211_ccmp_encrypt, + .decrypt_mpdu = lib80211_ccmp_decrypt, .encrypt_msdu = NULL, .decrypt_msdu = NULL, - .set_key = ieee80211_ccmp_set_key, - .get_key = ieee80211_ccmp_get_key, - .print_stats = ieee80211_ccmp_print_stats, + .set_key = lib80211_ccmp_set_key, + .get_key = lib80211_ccmp_get_key, + .print_stats = lib80211_ccmp_print_stats, .extra_mpdu_prefix_len = CCMP_HDR_LEN, .extra_mpdu_postfix_len = CCMP_MIC_LEN, .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_ccmp_init(void) +static int __init lib80211_crypto_ccmp_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp); + return lib80211_register_crypto_ops(&lib80211_crypt_ccmp); } -static void __exit ieee80211_crypto_ccmp_exit(void) +static void __exit lib80211_crypto_ccmp_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp); + lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp); } -module_init(ieee80211_crypto_ccmp_init); -module_exit(ieee80211_crypto_ccmp_exit); +module_init(lib80211_crypto_ccmp_init); +module_exit(lib80211_crypto_ccmp_exit); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index bba0152e2d7..7e8e22bfed9 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based TKIP encryption implementation for Host AP driver + * lib80211 crypt: host-based TKIP encryption implementation for lib80211 * * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,16 +23,20 @@ #include <linux/if_arp.h> #include <asm/string.h> -#include <net/ieee80211.h> +#include <linux/wireless.h> +#include <linux/ieee80211.h> +#include <net/iw_handler.h> #include <linux/crypto.h> #include <linux/crc32.h> +#include <net/lib80211.h> + MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: TKIP"); +MODULE_DESCRIPTION("lib80211 crypt: TKIP"); MODULE_LICENSE("GPL"); -struct ieee80211_tkip_data { +struct lib80211_tkip_data { #define TKIP_KEY_LEN 32 u8 key[TKIP_KEY_LEN]; int key_set; @@ -65,23 +70,23 @@ struct ieee80211_tkip_data { unsigned long flags; }; -static unsigned long ieee80211_tkip_set_flags(unsigned long flags, void *priv) +static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; unsigned long old_flags = _priv->flags; _priv->flags = flags; return old_flags; } -static unsigned long ieee80211_tkip_get_flags(void *priv) +static unsigned long lib80211_tkip_get_flags(void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; return _priv->flags; } -static void *ieee80211_tkip_init(int key_idx) +static void *lib80211_tkip_init(int key_idx) { - struct ieee80211_tkip_data *priv; + struct lib80211_tkip_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -92,7 +97,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API arc4\n"); priv->tx_tfm_arc4 = NULL; goto fail; @@ -101,7 +106,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); priv->tx_tfm_michael = NULL; goto fail; @@ -110,7 +115,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API arc4\n"); priv->rx_tfm_arc4 = NULL; goto fail; @@ -119,7 +124,7 @@ static void *ieee80211_tkip_init(int key_idx) priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); priv->rx_tfm_michael = NULL; goto fail; @@ -143,9 +148,9 @@ static void *ieee80211_tkip_init(int key_idx) return NULL; } -static void ieee80211_tkip_deinit(void *priv) +static void lib80211_tkip_deinit(void *priv) { - struct ieee80211_tkip_data *_priv = priv; + struct lib80211_tkip_data *_priv = priv; if (_priv) { if (_priv->tx_tfm_michael) crypto_free_hash(_priv->tx_tfm_michael); @@ -305,15 +310,15 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, #endif } -static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, +static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len, u8 * rc4key, int keylen, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; int len; u8 *pos; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (skb_headroom(skb) < 8 || skb->len < hdr_len) return -1; @@ -351,23 +356,21 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, return 8; } -static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; int len; u8 rc4key[16], *pos, *icv; u32 crc; struct scatterlist sg; - DECLARE_MAC_BUF(mac); if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { - struct ieee80211_hdr_4addr *hdr = - (struct ieee80211_hdr_4addr *)skb->data; + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *)skb->data; printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "TX packet to %s\n", - print_mac(mac, hdr->addr1)); + "TX packet to %pM\n", hdr->addr1); } return -1; } @@ -378,7 +381,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) len = skb->len - hdr_len; pos = skb->data + hdr_len; - if ((ieee80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) + if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) return -1; icv = skb_put(skb, 4); @@ -407,28 +410,26 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, return 0; } -static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; u8 rc4key[16]; u8 keyidx, *pos; u32 iv32; u16 iv16; - struct ieee80211_hdr_4addr *hdr; + struct ieee80211_hdr *hdr; u8 icv[4]; u32 crc; struct scatterlist sg; int plen; - DECLARE_MAC_BUF(mac); - hdr = (struct ieee80211_hdr_4addr *)skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "received packet from %s\n", - print_mac(mac, hdr->addr2)); + "received packet from %pM\n", hdr->addr2); } return -1; } @@ -441,7 +442,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (!(keyidx & (1 << 5))) { if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: received packet without ExtIV" - " flag from %s\n", print_mac(mac, hdr->addr2)); + " flag from %pM\n", hdr->addr2); } return -2; } @@ -453,9 +454,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } if (!tkey->key_set) { if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP: received packet from %s" + printk(KERN_DEBUG "TKIP: received packet from %pM" " with keyid=%d that does not have a configured" - " key\n", print_mac(mac, hdr->addr2), keyidx); + " key\n", hdr->addr2, keyidx); } return -3; } @@ -464,10 +465,10 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s" + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" " previous TSC %08x%04x received TSC " - "%08x%04x\n", print_mac(mac, hdr->addr2), + "%08x%04x\n", hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); } tkey->dot11RSNAStatsTKIPReplays++; @@ -487,8 +488,8 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP: failed to decrypt " - "received packet from %s\n", - print_mac(mac, hdr->addr2)); + "received packet from %pM\n", + hdr->addr2); } return -7; } @@ -504,9 +505,9 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) * it needs to be recalculated for the next packet. */ tkey->rx_phase1_done = 0; } - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" - "%s\n", print_mac(mac, hdr->addr2)); + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: ICV error detected: STA=" + "%pM\n", hdr->addr2); } tkey->dot11RSNAStatsTKIPICVErrors++; return -5; @@ -549,13 +550,11 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) { - struct ieee80211_hdr_4addr *hdr11; - u16 stype; + struct ieee80211_hdr *hdr11; - hdr11 = (struct ieee80211_hdr_4addr *)skb->data; - stype = WLAN_FC_GET_STYPE(le16_to_cpu(hdr11->frame_ctl)); + hdr11 = (struct ieee80211_hdr *)skb->data; - switch (le16_to_cpu(hdr11->frame_ctl) & + switch (le16_to_cpu(hdr11->frame_control) & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { case IEEE80211_FCTL_TODS: memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ @@ -575,20 +574,19 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) break; } - if (stype & IEEE80211_STYPE_QOS_DATA) { - const struct ieee80211_hdr_3addrqos *qoshdr = - (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; + if (ieee80211_is_data_qos(hdr11->frame_control)) { + hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) + & IEEE80211_QOS_CTL_TID_MASK; } else hdr[12] = 0; /* priority */ hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ } -static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, +static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; u8 *pos; if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { @@ -607,8 +605,8 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, return 0; } -static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr_4addr *hdr, +static void lib80211_michael_mic_failure(struct net_device *dev, + struct ieee80211_hdr *hdr, int keyidx) { union iwreq_data wrqu; @@ -628,12 +626,11 @@ static void ieee80211_michael_mic_failure(struct net_device *dev, wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); } -static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, +static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx, int hdr_len, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; u8 mic[8]; - DECLARE_MAC_BUF(mac); if (!tkey->key_set) return -1; @@ -643,14 +640,14 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr_4addr *hdr; - hdr = (struct ieee80211_hdr_4addr *)skb->data; + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from %s keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", print_mac(mac, hdr->addr2), + "MSDU from %pM keyidx=%d\n", + skb->dev ? skb->dev->name : "N/A", hdr->addr2, keyidx); if (skb->dev) - ieee80211_michael_mic_failure(skb->dev, hdr, keyidx); + lib80211_michael_mic_failure(skb->dev, hdr, keyidx); tkey->dot11RSNAStatsTKIPLocalMICFailures++; return -1; } @@ -665,9 +662,9 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, return 0; } -static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; int keyidx; struct crypto_hash *tfm = tkey->tx_tfm_michael; struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; @@ -698,9 +695,9 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) { - struct ieee80211_tkip_data *tkey = priv; + struct lib80211_tkip_data *tkey = priv; if (len < TKIP_KEY_LEN) return -1; @@ -727,9 +724,9 @@ static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) return TKIP_KEY_LEN; } -static char *ieee80211_tkip_print_stats(char *p, void *priv) +static char *lib80211_tkip_print_stats(char *p, void *priv) { - struct ieee80211_tkip_data *tkip = priv; + struct lib80211_tkip_data *tkip = priv; p += sprintf(p, "key[%d] alg=TKIP key_set=%d " "tx_pn=%02x%02x%02x%02x%02x%02x " "rx_pn=%02x%02x%02x%02x%02x%02x " @@ -753,35 +750,35 @@ static char *ieee80211_tkip_print_stats(char *p, void *priv) return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { +static struct lib80211_crypto_ops lib80211_crypt_tkip = { .name = "TKIP", - .init = ieee80211_tkip_init, - .deinit = ieee80211_tkip_deinit, - .build_iv = ieee80211_tkip_hdr, - .encrypt_mpdu = ieee80211_tkip_encrypt, - .decrypt_mpdu = ieee80211_tkip_decrypt, - .encrypt_msdu = ieee80211_michael_mic_add, - .decrypt_msdu = ieee80211_michael_mic_verify, - .set_key = ieee80211_tkip_set_key, - .get_key = ieee80211_tkip_get_key, - .print_stats = ieee80211_tkip_print_stats, + .init = lib80211_tkip_init, + .deinit = lib80211_tkip_deinit, + .build_iv = lib80211_tkip_hdr, + .encrypt_mpdu = lib80211_tkip_encrypt, + .decrypt_mpdu = lib80211_tkip_decrypt, + .encrypt_msdu = lib80211_michael_mic_add, + .decrypt_msdu = lib80211_michael_mic_verify, + .set_key = lib80211_tkip_set_key, + .get_key = lib80211_tkip_get_key, + .print_stats = lib80211_tkip_print_stats, .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ .extra_mpdu_postfix_len = 4, /* ICV */ .extra_msdu_postfix_len = 8, /* MIC */ - .get_flags = ieee80211_tkip_get_flags, - .set_flags = ieee80211_tkip_set_flags, + .get_flags = lib80211_tkip_get_flags, + .set_flags = lib80211_tkip_set_flags, .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_tkip_init(void) +static int __init lib80211_crypto_tkip_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip); + return lib80211_register_crypto_ops(&lib80211_crypt_tkip); } -static void __exit ieee80211_crypto_tkip_exit(void) +static void __exit lib80211_crypto_tkip_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip); + lib80211_unregister_crypto_ops(&lib80211_crypt_tkip); } -module_init(ieee80211_crypto_tkip_init); -module_exit(ieee80211_crypto_tkip_exit); +module_init(lib80211_crypto_tkip_init); +module_exit(lib80211_crypto_tkip_exit); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 3fa30c40779..6d41e05ca33 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -1,7 +1,8 @@ /* - * Host AP crypt: host-based WEP encryption implementation for Host AP driver + * lib80211 crypt: host-based WEP encryption implementation for lib80211 * * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi> + * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,16 +20,16 @@ #include <linux/mm.h> #include <asm/string.h> -#include <net/ieee80211.h> +#include <net/lib80211.h> #include <linux/crypto.h> #include <linux/crc32.h> MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: WEP"); +MODULE_DESCRIPTION("lib80211 crypt: WEP"); MODULE_LICENSE("GPL"); -struct prism2_wep_data { +struct lib80211_wep_data { u32 iv; #define WEP_KEY_LEN 13 u8 key[WEP_KEY_LEN + 1]; @@ -38,9 +39,9 @@ struct prism2_wep_data { struct crypto_blkcipher *rx_tfm; }; -static void *prism2_wep_init(int keyidx) +static void *lib80211_wep_init(int keyidx) { - struct prism2_wep_data *priv; + struct lib80211_wep_data *priv; priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) @@ -49,7 +50,7 @@ static void *prism2_wep_init(int keyidx) priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " "crypto API arc4\n"); priv->tx_tfm = NULL; goto fail; @@ -57,7 +58,7 @@ static void *prism2_wep_init(int keyidx) priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " "crypto API arc4\n"); priv->rx_tfm = NULL; goto fail; @@ -78,9 +79,9 @@ static void *prism2_wep_init(int keyidx) return NULL; } -static void prism2_wep_deinit(void *priv) +static void lib80211_wep_deinit(void *priv) { - struct prism2_wep_data *_priv = priv; + struct lib80211_wep_data *_priv = priv; if (_priv) { if (_priv->tx_tfm) crypto_free_blkcipher(_priv->tx_tfm); @@ -91,10 +92,10 @@ static void prism2_wep_deinit(void *priv) } /* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, +static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len, u8 *key, int keylen, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; u32 klen, len; u8 *pos; @@ -134,21 +135,21 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, * * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) */ -static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; u32 crc, klen, len; u8 *pos, *icv; struct scatterlist sg; u8 key[WEP_KEY_LEN + 3]; - /* other checks are in prism2_wep_build_iv */ + /* other checks are in lib80211_wep_build_iv */ if (skb_tailroom(skb) < 4) return -1; /* add the IV to the frame */ - if (prism2_wep_build_iv(skb, hdr_len, NULL, 0, priv)) + if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv)) return -1; /* Copy the IV into the first 3 bytes of the key */ @@ -181,9 +182,9 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on * failure. If frame is OK, IV and ICV will be removed. */ -static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; u32 crc, klen, plen; u8 key[WEP_KEY_LEN + 3]; @@ -232,9 +233,9 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } -static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; if (len < 0 || len > WEP_KEY_LEN) return -1; @@ -245,9 +246,9 @@ static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) return 0; } -static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) +static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; if (len < wep->key_len) return -1; @@ -257,39 +258,39 @@ static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) return wep->key_len; } -static char *prism2_wep_print_stats(char *p, void *priv) +static char *lib80211_wep_print_stats(char *p, void *priv) { - struct prism2_wep_data *wep = priv; + struct lib80211_wep_data *wep = priv; p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); return p; } -static struct ieee80211_crypto_ops ieee80211_crypt_wep = { +static struct lib80211_crypto_ops lib80211_crypt_wep = { .name = "WEP", - .init = prism2_wep_init, - .deinit = prism2_wep_deinit, - .build_iv = prism2_wep_build_iv, - .encrypt_mpdu = prism2_wep_encrypt, - .decrypt_mpdu = prism2_wep_decrypt, + .init = lib80211_wep_init, + .deinit = lib80211_wep_deinit, + .build_iv = lib80211_wep_build_iv, + .encrypt_mpdu = lib80211_wep_encrypt, + .decrypt_mpdu = lib80211_wep_decrypt, .encrypt_msdu = NULL, .decrypt_msdu = NULL, - .set_key = prism2_wep_set_key, - .get_key = prism2_wep_get_key, - .print_stats = prism2_wep_print_stats, + .set_key = lib80211_wep_set_key, + .get_key = lib80211_wep_get_key, + .print_stats = lib80211_wep_print_stats, .extra_mpdu_prefix_len = 4, /* IV */ .extra_mpdu_postfix_len = 4, /* ICV */ .owner = THIS_MODULE, }; -static int __init ieee80211_crypto_wep_init(void) +static int __init lib80211_crypto_wep_init(void) { - return ieee80211_register_crypto_ops(&ieee80211_crypt_wep); + return lib80211_register_crypto_ops(&lib80211_crypt_wep); } -static void __exit ieee80211_crypto_wep_exit(void) +static void __exit lib80211_crypto_wep_exit(void) { - ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep); + lib80211_unregister_crypto_ops(&lib80211_crypt_wep); } -module_init(ieee80211_crypto_wep_init); -module_exit(ieee80211_crypto_wep_exit); +module_init(lib80211_crypto_wep_init); +module_exit(lib80211_crypto_wep_exit); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 572793c8c7a..1e728fff474 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -58,6 +58,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = BUS_ID_SIZE-1 }, + [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -84,7 +87,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_STA_PLINK_ACTION] = { .type = NLA_U8 }, [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, - [NL80211_ATTR_MNTR_FLAGS] = { .type = NLA_NESTED }, + [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, @@ -95,6 +98,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + + [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HT_CAPABILITY_LEN }, @@ -157,6 +164,19 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (!nl_band) goto nla_put_failure; + /* add HT info */ + if (dev->wiphy.bands[band]->ht_cap.ht_supported) { + NLA_PUT(msg, NL80211_BAND_ATTR_HT_MCS_SET, + sizeof(dev->wiphy.bands[band]->ht_cap.mcs), + &dev->wiphy.bands[band]->ht_cap.mcs); + NLA_PUT_U16(msg, NL80211_BAND_ATTR_HT_CAPA, + dev->wiphy.bands[band]->ht_cap.cap); + NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + dev->wiphy.bands[band]->ht_cap.ampdu_factor); + NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + dev->wiphy.bands[band]->ht_cap.ampdu_density); + } + /* add frequencies */ nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) @@ -180,6 +200,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (chan->flags & IEEE80211_CHAN_RADAR) NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR); + NLA_PUT_U32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + DBM_TO_MBM(chan->max_power)); + nla_nest_end(msg, nl_freq); } @@ -269,20 +292,142 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } +static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { + [NL80211_TXQ_ATTR_QUEUE] = { .type = NLA_U8 }, + [NL80211_TXQ_ATTR_TXOP] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMIN] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMAX] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_AIFS] = { .type = NLA_U8 }, +}; + +static int parse_txq_params(struct nlattr *tb[], + struct ieee80211_txq_params *txq_params) +{ + if (!tb[NL80211_TXQ_ATTR_QUEUE] || !tb[NL80211_TXQ_ATTR_TXOP] || + !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || + !tb[NL80211_TXQ_ATTR_AIFS]) + return -EINVAL; + + txq_params->queue = nla_get_u8(tb[NL80211_TXQ_ATTR_QUEUE]); + txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); + txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); + txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); + txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); + + return 0; +} + static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; - int result; - - if (!info->attrs[NL80211_ATTR_WIPHY_NAME]) - return -EINVAL; + int result = 0, rem_txq_params = 0; + struct nlattr *nl_txq_params; rdev = cfg80211_get_dev_from_info(info); if (IS_ERR(rdev)) return PTR_ERR(rdev); - result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (info->attrs[NL80211_ATTR_WIPHY_NAME]) { + result = cfg80211_dev_rename( + rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (result) + goto bad_res; + } + + if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { + struct ieee80211_txq_params txq_params; + struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; + + if (!rdev->ops->set_txq_params) { + result = -EOPNOTSUPP; + goto bad_res; + } + + nla_for_each_nested(nl_txq_params, + info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], + rem_txq_params) { + nla_parse(tb, NL80211_TXQ_ATTR_MAX, + nla_data(nl_txq_params), + nla_len(nl_txq_params), + txq_params_policy); + result = parse_txq_params(tb, &txq_params); + if (result) + goto bad_res; + + result = rdev->ops->set_txq_params(&rdev->wiphy, + &txq_params); + if (result) + goto bad_res; + } + } + + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + struct ieee80211_channel *chan; + struct ieee80211_sta_ht_cap *ht_cap; + u32 freq, sec_freq; + + if (!rdev->ops->set_channel) { + result = -EOPNOTSUPP; + goto bad_res; + } + + result = -EINVAL; + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + channel_type = nla_get_u32(info->attrs[ + NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40PLUS && + channel_type != NL80211_CHAN_HT40MINUS) + goto bad_res; + } + + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + chan = ieee80211_get_channel(&rdev->wiphy, freq); + + /* Primary channel not allowed */ + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) + goto bad_res; + + if (channel_type == NL80211_CHAN_HT40MINUS) + sec_freq = freq - 20; + else if (channel_type == NL80211_CHAN_HT40PLUS) + sec_freq = freq + 20; + else + sec_freq = 0; + + ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap; + /* no HT capabilities */ + if (channel_type != NL80211_CHAN_NO_HT && + !ht_cap->ht_supported) + goto bad_res; + + if (sec_freq) { + struct ieee80211_channel *schan; + + /* no 40 MHz capabilities */ + if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || + (ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)) + goto bad_res; + + schan = ieee80211_get_channel(&rdev->wiphy, sec_freq); + + /* Secondary channel not allowed */ + if (!schan || schan->flags & IEEE80211_CHAN_DISABLED) + goto bad_res; + } + + result = rdev->ops->set_channel(&rdev->wiphy, chan, + channel_type); + if (result) + goto bad_res; + } + + + bad_res: cfg80211_put_dev(rdev); return result; } @@ -945,12 +1090,46 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags) return 0; } +static u16 nl80211_calculate_bitrate(struct rate_info *rate) +{ + int modulation, streams, bitrate; + + if (!(rate->flags & RATE_INFO_FLAGS_MCS)) + return rate->legacy; + + /* the formula below does only work for MCS values smaller than 32 */ + if (rate->mcs >= 32) + return 0; + + modulation = rate->mcs & 7; + streams = (rate->mcs >> 3) + 1; + + bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ? + 13500000 : 6500000; + + if (modulation < 4) + bitrate *= (modulation + 1); + else if (modulation == 4) + bitrate *= (modulation + 2); + else + bitrate *= (modulation + 3); + + bitrate *= streams; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +} + static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, u8 *mac_addr, struct station_info *sinfo) { void *hdr; - struct nlattr *sinfoattr; + struct nlattr *sinfoattr, *txrate; + u16 bitrate; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) @@ -980,7 +1159,29 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_PLINK_STATE) NLA_PUT_U8(msg, NL80211_STA_INFO_PLINK_STATE, sinfo->plink_state); + if (sinfo->filled & STATION_INFO_SIGNAL) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, + sinfo->signal); + if (sinfo->filled & STATION_INFO_TX_BITRATE) { + txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); + if (!txrate) + goto nla_put_failure; + + /* nl80211_calculate_bitrate will return 0 for mcs >= 32 */ + bitrate = nl80211_calculate_bitrate(&sinfo->txrate); + if (bitrate > 0) + NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); + + if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS) + NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, + sinfo->txrate.mcs); + if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); + if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); + nla_nest_end(msg, txrate); + } nla_nest_end(msg, sinfoattr); return genlmsg_end(msg, hdr); @@ -1598,6 +1799,12 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) params.use_short_slot_time = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + params.basic_rates = + nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + params.basic_rates_len = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + } err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) @@ -1680,11 +1887,188 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; #endif mutex_lock(&cfg80211_drv_mutex); - r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); return r; } +static int nl80211_get_mesh_params(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct mesh_config cur_params; + int err; + struct net_device *dev; + void *hdr; + struct nlattr *pinfoattr; + struct sk_buff *msg; + + /* Look up our device */ + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + /* Get the mesh params */ + rtnl_lock(); + err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); + rtnl_unlock(); + if (err) + goto out; + + /* Draw up a netlink message to send back */ + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) { + err = -ENOBUFS; + goto out; + } + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_GET_MESH_PARAMS); + if (!hdr) + goto nla_put_failure; + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS); + if (!pinfoattr) + goto nla_put_failure; + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT_U16(msg, NL80211_MESHCONF_RETRY_TIMEOUT, + cur_params.dot11MeshRetryTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_CONFIRM_TIMEOUT, + cur_params.dot11MeshConfirmTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_HOLDING_TIMEOUT, + cur_params.dot11MeshHoldingTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_MAX_PEER_LINKS, + cur_params.dot11MeshMaxPeerLinks); + NLA_PUT_U8(msg, NL80211_MESHCONF_MAX_RETRIES, + cur_params.dot11MeshMaxRetries); + NLA_PUT_U8(msg, NL80211_MESHCONF_TTL, + cur_params.dot11MeshTTL); + NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, + cur_params.auto_open_plinks); + NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + cur_params.dot11MeshHWMPmaxPREQretries); + NLA_PUT_U32(msg, NL80211_MESHCONF_PATH_REFRESH_TIME, + cur_params.path_refresh_time); + NLA_PUT_U16(msg, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + cur_params.min_discovery_timeout); + NLA_PUT_U32(msg, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + cur_params.dot11MeshHWMPactivePathTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + cur_params.dot11MeshHWMPpreqMinInterval); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + cur_params.dot11MeshHWMPnetDiameterTraversalTime); + nla_nest_end(msg, pinfoattr); + genlmsg_end(msg, hdr); + err = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + err = -EMSGSIZE; +out: + /* Cleanup */ + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ +do {\ + if (table[attr_num]) {\ + cfg.param = nla_fn(table[attr_num]); \ + mask |= (1 << (attr_num - 1)); \ + } \ +} while (0);\ + +static struct nla_policy +nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = { + [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 }, + [NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 }, + [NL80211_MESHCONF_TTL] = { .type = NLA_U8 }, + [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, + + [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, + [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, + [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, +}; + +static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) +{ + int err; + u32 mask; + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct mesh_config cfg; + struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; + struct nlattr *parent_attr; + + parent_attr = info->attrs[NL80211_ATTR_MESH_PARAMS]; + if (!parent_attr) + return -EINVAL; + if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, + parent_attr, nl80211_meshconf_params_policy)) + return -EINVAL; + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + /* This makes sure that there aren't more than 32 mesh config + * parameters (otherwise our bitfield scheme would not work.) */ + BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); + + /* Fill in the params struct */ + mask = 0; + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, + mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, + mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, + mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, + mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, + mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, + mask, NL80211_MESHCONF_TTL, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, + mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, + mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, + mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, + mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, + mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, + mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPnetDiameterTraversalTime, + mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + nla_get_u16); + + /* Apply changes */ + rtnl_lock(); + err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); + rtnl_unlock(); + + /* cleanup */ + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +#undef FILL_IN_MESH_PARAM_IF_SET + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -1743,12 +2127,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) mutex_lock(&cfg80211_drv_mutex); r = set_regdom(rd); mutex_unlock(&cfg80211_drv_mutex); - if (r) - goto bad_reg; - return r; -bad_reg: + bad_reg: kfree(rd); return -EINVAL; } @@ -1902,6 +2283,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_MESH_PARAMS, + .doit = nl80211_get_mesh_params, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_MESH_PARAMS, + .doit = nl80211_set_mesh_params, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index eb3b1a9f9b1..4f877535e66 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -42,17 +42,40 @@ #include "core.h" #include "reg.h" -/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +/** + * struct regulatory_request - receipt of last regulatory request + * + * @wiphy: this is set if this request's initiator is + * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This + * can be used by the wireless core to deal with conflicts + * and potentially inform users of which devices specifically + * cased the conflicts. + * @initiator: indicates who sent this request, could be any of + * of those set in reg_set_by, %REGDOM_SET_BY_* + * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested + * regulatory domain. We have a few special codes: + * 00 - World regulatory domain + * 99 - built by driver but a specific alpha2 cannot be determined + * 98 - result of an intersection between two regulatory domains + * @intersect: indicates whether the wireless core should intersect + * the requested regulatory domain with the presently set regulatory + * domain. + * @country_ie_checksum: checksum of the last processed and accepted + * country IE + * @country_ie_env: lets us know if the AP is telling us we are outdoor, + * indoor, or if it doesn't matter + */ struct regulatory_request { - struct list_head list; struct wiphy *wiphy; - int granted; enum reg_set_by initiator; char alpha2[2]; + bool intersect; + u32 country_ie_checksum; + enum environment_cap country_ie_env; }; -static LIST_HEAD(regulatory_requests); -DEFINE_MUTEX(cfg80211_reg_mutex); +/* Receipt of information from last regulatory request */ +static struct regulatory_request *last_request; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -63,13 +86,16 @@ static u32 supported_bandwidths[] = { MHZ_TO_KHZ(20), }; -static struct list_head regulatory_requests; - /* Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2 */ static const struct ieee80211_regdomain *cfg80211_regdomain; +/* We use this as a place for the rd structure built from the + * last parsed country IE to rest until CRDA gets back to us with + * what it thinks should apply for the same country */ +static const struct ieee80211_regdomain *country_ie_regdomain; + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, @@ -204,7 +230,7 @@ static void reset_regdomains(void) * core upon initialization */ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { - BUG_ON(list_empty(®ulatory_requests)); + BUG_ON(!last_request); reset_regdomains(); @@ -249,6 +275,18 @@ static bool is_unknown_alpha2(const char *alpha2) return false; } +static bool is_intersected_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain is the + * result of an intersection between two regulatory domain + * structures */ + if (alpha2[0] == '9' && alpha2[1] == '8') + return true; + return false; +} + static bool is_an_alpha2(const char *alpha2) { if (!alpha2) @@ -277,6 +315,25 @@ static bool regdom_changed(const char *alpha2) return true; } +/** + * country_ie_integrity_changes - tells us if the country IE has changed + * @checksum: checksum of country IE of fields we are interested in + * + * If the country IE has not changed you can ignore it safely. This is + * useful to determine if two devices are seeing two different country IEs + * even on the same alpha2. Note that this will return false if no IE has + * been set on the wireless core yet. + */ +static bool country_ie_integrity_changes(u32 checksum) +{ + /* If no IE has been set then the checksum doesn't change */ + if (unlikely(!last_request->country_ie_checksum)) + return false; + if (unlikely(last_request->country_ie_checksum != checksum)) + return true; + return false; +} + /* This lets us keep regulatory code which is updated on a regulatory * basis in userspace. */ static int call_crda(const char *alpha2) @@ -300,121 +357,13 @@ static int call_crda(const char *alpha2) return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); } -/* This has the logic which determines when a new request - * should be ignored. */ -static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, - char *alpha2, struct ieee80211_regdomain *rd) -{ - struct regulatory_request *last_request = NULL; - - /* All initial requests are respected */ - if (list_empty(®ulatory_requests)) - return 0; - - last_request = list_first_entry(®ulatory_requests, - struct regulatory_request, list); - - switch (set_by) { - case REGDOM_SET_BY_INIT: - return -EINVAL; - case REGDOM_SET_BY_CORE: - /* Always respect new wireless core hints, should only - * come in for updating the world regulatory domain at init - * anyway */ - return 0; - case REGDOM_SET_BY_COUNTRY_IE: - if (last_request->initiator == set_by) { - if (last_request->wiphy != wiphy) { - /* Two cards with two APs claiming different - * different Country IE alpha2s! - * You're special!! */ - if (!alpha2_equal(last_request->alpha2, - cfg80211_regdomain->alpha2)) { - /* XXX: Deal with conflict, consider - * building a new one out of the - * intersection */ - WARN_ON(1); - return -EOPNOTSUPP; - } - return -EALREADY; - } - /* Two consecutive Country IE hints on the same wiphy */ - if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) - return 0; - return -EALREADY; - } - if (WARN(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2), - "Invalid Country IE regulatory hint passed " - "to the wireless core\n")) - return -EINVAL; - /* We ignore Country IE hints for now, as we haven't yet - * added the dot11MultiDomainCapabilityEnabled flag - * for wiphys */ - return 1; - case REGDOM_SET_BY_DRIVER: - BUG_ON(!wiphy); - if (last_request->initiator == set_by) { - /* Two separate drivers hinting different things, - * this is possible if you have two devices present - * on a system with different EEPROM regulatory - * readings. XXX: Do intersection, we support only - * the first regulatory hint for now */ - if (last_request->wiphy != wiphy) - return -EALREADY; - if (rd) - return -EALREADY; - /* Driver should not be trying to hint different - * regulatory domains! */ - BUG_ON(!alpha2_equal(alpha2, - cfg80211_regdomain->alpha2)); - return -EALREADY; - } - if (last_request->initiator == REGDOM_SET_BY_CORE) - return 0; - /* XXX: Handle intersection, and add the - * dot11MultiDomainCapabilityEnabled flag to wiphy. For now - * we assume the driver has this set to false, following the - * 802.11d dot11MultiDomainCapabilityEnabled documentation */ - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) - return 0; - return 0; - case REGDOM_SET_BY_USER: - if (last_request->initiator == set_by || - last_request->initiator == REGDOM_SET_BY_CORE) - return 0; - /* Drivers can use their wiphy's reg_notifier() - * to override any information */ - if (last_request->initiator == REGDOM_SET_BY_DRIVER) - return 0; - /* XXX: Handle intersection */ - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) - return -EOPNOTSUPP; - return 0; - default: - return -EINVAL; - } -} - -static bool __reg_is_valid_request(const char *alpha2, - struct regulatory_request **request) -{ - struct regulatory_request *req; - if (list_empty(®ulatory_requests)) - return false; - list_for_each_entry(req, ®ulatory_requests, list) { - if (alpha2_equal(req->alpha2, alpha2)) { - *request = req; - return true; - } - } - return false; -} - /* Used by nl80211 before kmalloc'ing our regulatory domain */ bool reg_is_valid_request(const char *alpha2) { - struct regulatory_request *request = NULL; - return __reg_is_valid_request(alpha2, &request); + if (!last_request) + return false; + + return alpha2_equal(last_request->alpha2, alpha2); } /* Sanity check on a regulatory rule */ @@ -423,7 +372,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) const struct ieee80211_freq_range *freq_range = &rule->freq_range; u32 freq_diff; - if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) + if (freq_range->start_freq_khz <= 0 || freq_range->end_freq_khz <= 0) return false; if (freq_range->start_freq_khz > freq_range->end_freq_khz) @@ -431,7 +380,7 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; - if (freq_range->max_bandwidth_khz > freq_diff) + if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff) return false; return true; @@ -445,6 +394,9 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) if (!rd->n_reg_rules) return false; + if (WARN_ON(rd->n_reg_rules > NL80211_MAX_SUPP_REG_RULES)) + return false; + for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; if (!is_valid_reg_rule(reg_rule)) @@ -469,6 +421,311 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, return 0; } +/* Converts a country IE to a regulatory domain. A regulatory domain + * structure has a lot of information which the IE doesn't yet have, + * so for the other values we use upper max values as we will intersect + * with our userspace regulatory agent to get lower bounds. */ +static struct ieee80211_regdomain *country_ie_2_rd( + u8 *country_ie, + u8 country_ie_len, + u32 *checksum) +{ + struct ieee80211_regdomain *rd = NULL; + unsigned int i = 0; + char alpha2[2]; + u32 flags = 0; + u32 num_rules = 0, size_of_regd = 0; + u8 *triplets_start = NULL; + u8 len_at_triplet = 0; + /* the last channel we have registered in a subband (triplet) */ + int last_sub_max_channel = 0; + + *checksum = 0xDEADBEEF; + + /* Country IE requirements */ + BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN || + country_ie_len & 0x01); + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + /* + * Third octet can be: + * 'I' - Indoor + * 'O' - Outdoor + * + * anything else we assume is no restrictions + */ + if (country_ie[2] == 'I') + flags = NL80211_RRF_NO_OUTDOOR; + else if (country_ie[2] == 'O') + flags = NL80211_RRF_NO_INDOOR; + + country_ie += 3; + country_ie_len -= 3; + + triplets_start = country_ie; + len_at_triplet = country_ie_len; + + *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); + + /* We need to build a reg rule for each triplet, but first we must + * calculate the number of reg rules we will need. We will need one + * for each channel subband */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + int cur_sub_max_channel = 0, cur_channel = 0; + + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + cur_channel = triplet->chans.first_channel; + cur_sub_max_channel = ieee80211_channel_to_frequency( + cur_channel + triplet->chans.num_channels); + + /* Basic sanity check */ + if (cur_sub_max_channel < cur_channel) + return NULL; + + /* Do not allow overlapping channels. Also channels + * passed in each subband must be monotonically + * increasing */ + if (last_sub_max_channel) { + if (cur_channel <= last_sub_max_channel) + return NULL; + if (cur_sub_max_channel <= last_sub_max_channel) + return NULL; + } + + /* When dot11RegulatoryClassesRequired is supported + * we can throw ext triplets as part of this soup, + * for now we don't care when those change as we + * don't support them */ + *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | + ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | + ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); + + last_sub_max_channel = cur_sub_max_channel; + + country_ie += 3; + country_ie_len -= 3; + num_rules++; + + /* Note: this is not a IEEE requirement but + * simply a memory requirement */ + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + return NULL; + } + + country_ie = triplets_start; + country_ie_len = len_at_triplet; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return NULL; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + /* This time around we fill in the rd */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + struct ieee80211_reg_rule *reg_rule = NULL; + struct ieee80211_freq_range *freq_range = NULL; + struct ieee80211_power_rule *power_rule = NULL; + + /* Must parse if dot11RegulatoryClassesRequired is true, + * we don't support this yet */ + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + reg_rule->flags = flags; + + /* The +10 is since the regulatory domain expects + * the actual band edge, not the center of freq for + * its start and end freqs, assuming 20 MHz bandwidth on + * the channels passed */ + freq_range->start_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel) - 10); + freq_range->end_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel + + triplet->chans.num_channels) + 10); + + /* Large arbitrary values, we intersect later */ + /* Increment this if we ever support >= 40 MHz channels + * in IEEE 802.11 */ + freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); + power_rule->max_antenna_gain = DBI_TO_MBI(100); + power_rule->max_eirp = DBM_TO_MBM(100); + + country_ie += 3; + country_ie_len -= 3; + i++; + + BUG_ON(i > NL80211_MAX_SUPP_REG_RULES); + } + + return rd; +} + + +/* Helper for regdom_intersect(), this does the real + * mathematical intersection fun */ +static int reg_rules_intersect( + const struct ieee80211_reg_rule *rule1, + const struct ieee80211_reg_rule *rule2, + struct ieee80211_reg_rule *intersected_rule) +{ + const struct ieee80211_freq_range *freq_range1, *freq_range2; + struct ieee80211_freq_range *freq_range; + const struct ieee80211_power_rule *power_rule1, *power_rule2; + struct ieee80211_power_rule *power_rule; + u32 freq_diff; + + freq_range1 = &rule1->freq_range; + freq_range2 = &rule2->freq_range; + freq_range = &intersected_rule->freq_range; + + power_rule1 = &rule1->power_rule; + power_rule2 = &rule2->power_rule; + power_rule = &intersected_rule->power_rule; + + freq_range->start_freq_khz = max(freq_range1->start_freq_khz, + freq_range2->start_freq_khz); + freq_range->end_freq_khz = min(freq_range1->end_freq_khz, + freq_range2->end_freq_khz); + freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz, + freq_range2->max_bandwidth_khz); + + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; + if (freq_range->max_bandwidth_khz > freq_diff) + freq_range->max_bandwidth_khz = freq_diff; + + power_rule->max_eirp = min(power_rule1->max_eirp, + power_rule2->max_eirp); + power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain, + power_rule2->max_antenna_gain); + + intersected_rule->flags = (rule1->flags | rule2->flags); + + if (!is_valid_reg_rule(intersected_rule)) + return -EINVAL; + + return 0; +} + +/** + * regdom_intersect - do the intersection between two regulatory domains + * @rd1: first regulatory domain + * @rd2: second regulatory domain + * + * Use this function to get the intersection between two regulatory domains. + * Once completed we will mark the alpha2 for the rd as intersected, "98", + * as no one single alpha2 can represent this regulatory domain. + * + * Returns a pointer to the regulatory domain structure which will hold the + * resulting intersection of rules between rd1 and rd2. We will + * kzalloc() this structure for you. + */ +static struct ieee80211_regdomain *regdom_intersect( + const struct ieee80211_regdomain *rd1, + const struct ieee80211_regdomain *rd2) +{ + int r, size_of_regd; + unsigned int x, y; + unsigned int num_rules = 0, rule_idx = 0; + const struct ieee80211_reg_rule *rule1, *rule2; + struct ieee80211_reg_rule *intersected_rule; + struct ieee80211_regdomain *rd; + /* This is just a dummy holder to help us count */ + struct ieee80211_reg_rule irule; + + /* Uses the stack temporarily for counter arithmetic */ + intersected_rule = &irule; + + memset(intersected_rule, 0, sizeof(struct ieee80211_reg_rule)); + + if (!rd1 || !rd2) + return NULL; + + /* First we get a count of the rules we'll need, then we actually + * build them. This is to so we can malloc() and free() a + * regdomain once. The reason we use reg_rules_intersect() here + * is it will return -EINVAL if the rule computed makes no sense. + * All rules that do check out OK are valid. */ + + for (x = 0; x < rd1->n_reg_rules; x++) { + rule1 = &rd1->reg_rules[x]; + for (y = 0; y < rd2->n_reg_rules; y++) { + rule2 = &rd2->reg_rules[y]; + if (!reg_rules_intersect(rule1, rule2, + intersected_rule)) + num_rules++; + memset(intersected_rule, 0, + sizeof(struct ieee80211_reg_rule)); + } + } + + if (!num_rules) + return NULL; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + ((num_rules + 1) * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return NULL; + + for (x = 0; x < rd1->n_reg_rules; x++) { + rule1 = &rd1->reg_rules[x]; + for (y = 0; y < rd2->n_reg_rules; y++) { + rule2 = &rd2->reg_rules[y]; + /* This time around instead of using the stack lets + * write to the target rule directly saving ourselves + * a memcpy() */ + intersected_rule = &rd->reg_rules[rule_idx]; + r = reg_rules_intersect(rule1, rule2, + intersected_rule); + /* No need to memset here the intersected rule here as + * we're not using the stack anymore */ + if (r) + continue; + rule_idx++; + } + } + + if (rule_idx != num_rules) { + kfree(rd); + return NULL; + } + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = '9'; + rd->alpha2[1] = '8'; + + return rd; +} + /* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may * want to just have the channel structure use these */ static u32 map_regdom_flags(u32 rd_flags) @@ -559,12 +816,23 @@ static void handle_band(struct ieee80211_supported_band *sband) handle_channel(&sband->channels[i]); } +static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) +{ + if (!last_request) + return true; + if (setby == REGDOM_SET_BY_CORE && + wiphy->fw_handles_regulatory) + return true; + return false; +} + static void update_all_wiphy_regulatory(enum reg_set_by setby) { struct cfg80211_registered_device *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) - wiphy_update_regulatory(&drv->wiphy, setby); + if (!ignore_reg_update(&drv->wiphy, setby)) + wiphy_update_regulatory(&drv->wiphy, setby); } void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) @@ -578,78 +846,237 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) } } -/* Caller must hold &cfg80211_drv_mutex */ -int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, struct ieee80211_regdomain *rd) -{ - struct regulatory_request *request; - char *rd_alpha2; - int r = 0; - - r = ignore_request(wiphy, set_by, (char *) alpha2, rd); - if (r) - return r; +/* Return value which can be used by ignore_request() to indicate + * it has been determined we should intersect two regulatory domains */ +#define REG_INTERSECT 1 - if (rd) - rd_alpha2 = rd->alpha2; - else - rd_alpha2 = (char *) alpha2; +/* This has the logic which determines when a new request + * should be ignored. */ +static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2) +{ + /* All initial requests are respected */ + if (!last_request) + return 0; switch (set_by) { + case REGDOM_SET_BY_INIT: + return -EINVAL; case REGDOM_SET_BY_CORE: + /* + * Always respect new wireless core hints, should only happen + * when updating the world regulatory domain at init. + */ + return 0; case REGDOM_SET_BY_COUNTRY_IE: + if (unlikely(!is_an_alpha2(alpha2))) + return -EINVAL; + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->wiphy != wiphy) { + /* + * Two cards with two APs claiming different + * different Country IE alpha2s. We could + * intersect them, but that seems unlikely + * to be correct. Reject second one for now. + */ + if (!alpha2_equal(alpha2, + cfg80211_regdomain->alpha2)) + return -EOPNOTSUPP; + return -EALREADY; + } + /* Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack */ + if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, + alpha2))) + return 0; + return -EALREADY; + } + return REG_INTERSECT; case REGDOM_SET_BY_DRIVER: + if (last_request->initiator == REGDOM_SET_BY_DRIVER) + return -EALREADY; + return 0; case REGDOM_SET_BY_USER: - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); - if (!request) - return -ENOMEM; - - request->alpha2[0] = rd_alpha2[0]; - request->alpha2[1] = rd_alpha2[1]; - request->initiator = set_by; - request->wiphy = wiphy; - - list_add_tail(&request->list, ®ulatory_requests); - if (rd) - break; - r = call_crda(alpha2); -#ifndef CONFIG_WIRELESS_OLD_REGULATORY - if (r) - printk(KERN_ERR "cfg80211: Failed calling CRDA\n"); -#endif - break; - default: - r = -ENOTSUPP; - break; + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return REG_INTERSECT; + /* If the user knows better the user should set the regdom + * to their country before the IE is picked up */ + if (last_request->initiator == REGDOM_SET_BY_USER && + last_request->intersect) + return -EOPNOTSUPP; + return 0; } - return r; + return -EINVAL; } -/* If rd is not NULL and if this call fails the caller must free it */ -int regulatory_hint(struct wiphy *wiphy, const char *alpha2, - struct ieee80211_regdomain *rd) +/* Caller must hold &cfg80211_drv_mutex */ +int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, + u32 country_ie_checksum, + enum environment_cap env) { - int r; - BUG_ON(!rd && !alpha2); + struct regulatory_request *request; + bool intersect = false; + int r = 0; - mutex_lock(&cfg80211_drv_mutex); + r = ignore_request(wiphy, set_by, alpha2); - r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd); - if (r || !rd) - goto unlock_and_exit; + if (r == REG_INTERSECT) + intersect = true; + else if (r) + return r; - /* If the driver passed a regulatory domain we skipped asking - * userspace for one so we can now go ahead and set it */ - r = set_regdom(rd); + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; + request->initiator = set_by; + request->wiphy = wiphy; + request->intersect = intersect; + request->country_ie_checksum = country_ie_checksum; + request->country_ie_env = env; + + kfree(last_request); + last_request = request; + /* + * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled + * AND if CRDA is NOT present nothing will happen, if someone + * wants to bother with 11d with OLD_REG you can add a timer. + * If after x amount of time nothing happens you can call: + * + * return set_regdom(country_ie_regdomain); + * + * to intersect with the static rd + */ + return call_crda(alpha2); +} + +void regulatory_hint(struct wiphy *wiphy, const char *alpha2) +{ + BUG_ON(!alpha2); -unlock_and_exit: + mutex_lock(&cfg80211_drv_mutex); + __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); - return r; } EXPORT_SYMBOL(regulatory_hint); +static bool reg_same_country_ie_hint(struct wiphy *wiphy, + u32 country_ie_checksum) +{ + if (!last_request->wiphy) + return false; + if (likely(last_request->wiphy != wiphy)) + return !country_ie_integrity_changes(country_ie_checksum); + /* We should not have let these through at this point, they + * should have been picked up earlier by the first alpha2 check + * on the device */ + if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) + return true; + return false; +} + +void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len) +{ + struct ieee80211_regdomain *rd = NULL; + char alpha2[2]; + u32 checksum = 0; + enum environment_cap env = ENVIRON_ANY; + + if (!last_request) + return; + + mutex_lock(&cfg80211_drv_mutex); + + /* IE len must be evenly divisible by 2 */ + if (country_ie_len & 0x01) + goto out; + + if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) + goto out; + + /* Pending country IE processing, this can happen after we + * call CRDA and wait for a response if a beacon was received before + * we were able to process the last regulatory_hint_11d() call */ + if (country_ie_regdomain) + goto out; + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + if (country_ie[2] == 'I') + env = ENVIRON_INDOOR; + else if (country_ie[2] == 'O') + env = ENVIRON_OUTDOOR; + + /* We will run this for *every* beacon processed for the BSSID, so + * we optimize an early check to exit out early if we don't have to + * do anything */ + if (likely(last_request->wiphy)) { + struct cfg80211_registered_device *drv_last_ie; + + drv_last_ie = wiphy_to_dev(last_request->wiphy); + + /* Lets keep this simple -- we trust the first AP + * after we intersect with CRDA */ + if (likely(last_request->wiphy == wiphy)) { + /* Ignore IEs coming in on this wiphy with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* the wiphy moved on to another BSSID or the AP + * was reconfigured. XXX: We need to deal with the + * case where the user suspends and goes to goes + * to another country, and then gets IEs from an + * AP with different settings */ + goto out; + } else { + /* Ignore IEs coming in on two separate wiphys with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* We could potentially intersect though */ + goto out; + } + } + + rd = country_ie_2_rd(country_ie, country_ie_len, &checksum); + if (!rd) + goto out; + + /* This will not happen right now but we leave it here for the + * the future when we want to add suspend/resume support and having + * the user move to another country after doing so, or having the user + * move to another AP. Right now we just trust the first AP. This is why + * this is marked as likley(). If we hit this before we add this support + * we want to be informed of it as it would indicate a mistake in the + * current design */ + if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) + goto out; + + /* We keep this around for when CRDA comes back with a response so + * we can intersect with that */ + country_ie_regdomain = rd; + + __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, + country_ie_regdomain->alpha2, checksum, env); + +out: + mutex_unlock(&cfg80211_drv_mutex); +} +EXPORT_SYMBOL(regulatory_hint_11d); static void print_rd_rules(const struct ieee80211_regdomain *rd) { @@ -689,7 +1116,25 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) static void print_regdomain(const struct ieee80211_regdomain *rd) { - if (is_world_regdom(rd->alpha2)) + if (is_intersected_alpha2(rd->alpha2)) { + struct wiphy *wiphy = NULL; + struct cfg80211_registered_device *drv; + + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->wiphy) { + wiphy = last_request->wiphy; + drv = wiphy_to_dev(wiphy); + printk(KERN_INFO "cfg80211: Current regulatory " + "domain updated by AP to: %c%c\n", + drv->country_ie_alpha2[0], + drv->country_ie_alpha2[1]); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "domain intersected: \n"); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "intersected: \n"); + } else if (is_world_regdom(rd->alpha2)) printk(KERN_INFO "cfg80211: World regulatory " "domain updated:\n"); else { @@ -705,21 +1150,50 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -void print_regdomain_info(const struct ieee80211_regdomain *rd) +static void print_regdomain_info(const struct ieee80211_regdomain *rd) { printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); print_rd_rules(rd); } -static int __set_regdom(const struct ieee80211_regdomain *rd) +#ifdef CONFIG_CFG80211_REG_DEBUG +static void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) { - struct regulatory_request *request = NULL; + printk(KERN_DEBUG "cfg80211: Received country IE:\n"); + print_regdomain_info(country_ie_regdomain); + printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n"); + print_regdomain_info(rd); + if (intersected_rd) { + printk(KERN_DEBUG "cfg80211: We intersect both of these " + "and get:\n"); + print_regdomain_info(rd); + return; + } + printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); +} +#else +static inline void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) +{ +} +#endif +/* Takes ownership of rd only if it doesn't fail */ +static int __set_regdom(const struct ieee80211_regdomain *rd) +{ + const struct ieee80211_regdomain *intersected_rd = NULL; + struct cfg80211_registered_device *drv = NULL; + struct wiphy *wiphy = NULL; /* Some basic sanity checks first */ if (is_world_regdom(rd->alpha2)) { - if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + if (WARN_ON(!reg_is_valid_request(rd->alpha2))) return -EINVAL; update_world_regdomain(rd); return 0; @@ -729,45 +1203,102 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) !is_unknown_alpha2(rd->alpha2)) return -EINVAL; - if (list_empty(®ulatory_requests)) + if (!last_request) return -EINVAL; - /* allow overriding the static definitions if CRDA is present */ - if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changed(rd->alpha2)) - return -EINVAL; + /* Lets only bother proceeding on the same alpha2 if the current + * rd is non static (it means CRDA was present and was used last) + * and the pending request came in from a country IE */ + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + /* If someone else asked us to change the rd lets only bother + * checking if the alpha2 changes if CRDA was already called */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; + } + + wiphy = last_request->wiphy; /* Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want * to review or adjust their own settings based on their own * internal EEPROM data */ - if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + if (WARN_ON(!reg_is_valid_request(rd->alpha2))) return -EINVAL; - reset_regdomains(); + if (!is_valid_rd(rd)) { + printk(KERN_ERR "cfg80211: Invalid " + "regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; + } - /* Country IE parsing coming soon */ - switch (request->initiator) { - case REGDOM_SET_BY_CORE: - case REGDOM_SET_BY_DRIVER: - case REGDOM_SET_BY_USER: - if (!is_valid_rd(rd)) { - printk(KERN_ERR "cfg80211: Invalid " - "regulatory domain detected:\n"); - print_regdomain_info(rd); + if (!last_request->intersect) { + reset_regdomains(); + cfg80211_regdomain = rd; + return 0; + } + + /* Intersection requires a bit more work */ + + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + + intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + if (!intersected_rd) return -EINVAL; - } - break; - case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */ - WARN_ON(1); - default: - return -EOPNOTSUPP; + + /* We can trash what CRDA provided now */ + kfree(rd); + rd = NULL; + + reset_regdomains(); + cfg80211_regdomain = intersected_rd; + + return 0; } - /* Tada! */ - cfg80211_regdomain = rd; - request->granted = 1; + /* + * Country IE requests are handled a bit differently, we intersect + * the country IE rd with what CRDA believes that country should have + */ + + BUG_ON(!country_ie_regdomain); + + if (rd != country_ie_regdomain) { + /* Intersect what CRDA returned and our what we + * had built from the Country IE received */ + + intersected_rd = regdom_intersect(rd, country_ie_regdomain); + + reg_country_ie_process_debug(rd, country_ie_regdomain, + intersected_rd); + + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + } else { + /* This would happen when CRDA was not present and + * OLD_REGULATORY was enabled. We intersect our Country + * IE rd and what was set on cfg80211 originally */ + intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + } + + if (!intersected_rd) + return -EINVAL; + + drv = wiphy_to_dev(wiphy); + + drv->country_ie_alpha2[0] = rd->alpha2[0]; + drv->country_ie_alpha2[1] = rd->alpha2[1]; + drv->env = last_request->country_ie_env; + + BUG_ON(intersected_rd == rd); + + kfree(rd); + rd = NULL; + + reset_regdomains(); + cfg80211_regdomain = intersected_rd; return 0; } @@ -775,52 +1306,41 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. If this calls fails you should kfree() - * the passed rd. Caller must hold cfg80211_drv_mutex */ + * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ int set_regdom(const struct ieee80211_regdomain *rd) { - struct regulatory_request *this_request = NULL, *prev_request = NULL; int r; - if (!list_empty(®ulatory_requests)) - prev_request = list_first_entry(®ulatory_requests, - struct regulatory_request, list); - /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); - if (r) + if (r) { + kfree(rd); return r; - - BUG_ON((!__reg_is_valid_request(rd->alpha2, &this_request))); - - /* The initial standard core update of the world regulatory domain, no - * need to keep that request info around if it didn't fail. */ - if (is_world_regdom(rd->alpha2) && - this_request->initiator == REGDOM_SET_BY_CORE && - this_request->granted) { - list_del(&this_request->list); - kfree(this_request); - this_request = NULL; - } - - /* Remove old requests, we only leave behind the last one */ - if (prev_request) { - list_del(&prev_request->list); - kfree(prev_request); - prev_request = NULL; } /* This would make this whole thing pointless */ - BUG_ON(rd != cfg80211_regdomain); + if (!last_request->intersect) + BUG_ON(rd != cfg80211_regdomain); /* update all wiphys now with the new established regulatory domain */ - update_all_wiphy_regulatory(this_request->initiator); + update_all_wiphy_regulatory(last_request->initiator); - print_regdomain(rd); + print_regdomain(cfg80211_regdomain); return r; } +/* Caller must hold cfg80211_drv_mutex */ +void reg_device_remove(struct wiphy *wiphy) +{ + if (!last_request || !last_request->wiphy) + return; + if (last_request->wiphy != wiphy) + return; + last_request->wiphy = NULL; + last_request->country_ie_env = ENVIRON_ANY; +} + int regulatory_init(void) { int err; @@ -838,13 +1358,13 @@ int regulatory_init(void) * you have CRDA you get it updated, otherwise you get * stuck with the static values. We ignore "EU" code as * that is not a valid ISO / IEC 3166 alpha2 */ - if (ieee80211_regdom[0] != 'E' && ieee80211_regdom[1] != 'U') + if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom, NULL); + ieee80211_regdom, 0, ENVIRON_ANY); #else cfg80211_regdomain = cfg80211_world_regdom; - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); if (err) printk(KERN_ERR "cfg80211: calling CRDA failed - " "unable to update world regulatory domain, " @@ -856,16 +1376,15 @@ int regulatory_init(void) void regulatory_exit(void) { - struct regulatory_request *req, *req_tmp; - mutex_lock(&cfg80211_drv_mutex); reset_regdomains(); - list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { - list_del(&req->list); - kfree(req); - } + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + + kfree(last_request); + platform_device_unregister(reg_pdev); mutex_unlock(&cfg80211_drv_mutex); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index a33362872f3..a76ea3ff7cd 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,13 +1,44 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H -extern struct mutex cfg80211_reg_mutex; bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); +void reg_device_remove(struct wiphy *wiphy); + int regulatory_init(void); void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); +enum environment_cap { + ENVIRON_ANY, + ENVIRON_INDOOR, + ENVIRON_OUTDOOR, +}; + + +/** + * __regulatory_hint - hint to the wireless core a regulatory domain + * @wiphy: if the hint comes from country information from an AP, this + * is required to be set to the wiphy that received the information + * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain + * should be in. + * @country_ie_checksum: checksum of processed country IE, set this to 0 + * if the hint did not come from a country IE + * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* + * + * The Wireless subsystem can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by giving it an + * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be + * in. + * + * Returns zero if all went fine, %-EALREADY if a regulatory domain had + * already been set or other standard error codes. + * + */ +extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, u32 country_ie_checksum, + enum environment_cap country_ie_env); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 29f820e1825..79a38287764 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -23,25 +23,20 @@ static inline struct cfg80211_registered_device *dev_to_rdev( return container_of(dev, struct cfg80211_registered_device, wiphy.dev); } -static ssize_t _show_index(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx); +#define SHOW_FMT(name, fmt, member) \ +static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ } -static ssize_t _show_permaddr(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - unsigned char *addr = dev_to_rdev(dev)->wiphy.perm_addr; - - return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); -} +SHOW_FMT(index, "%d", idx); +SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); static struct device_attribute ieee80211_dev_attrs[] = { - __ATTR(index, S_IRUGO, _show_index, NULL), - __ATTR(macaddress, S_IRUGO, _show_permaddr, NULL), + __ATTR_RO(index), + __ATTR_RO(macaddress), {} }; diff --git a/net/wireless/util.c b/net/wireless/util.c index f54424693a3..e76cc28b034 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -7,6 +7,25 @@ #include <asm/bitops.h> #include "core.h" +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u64 basic_rates, int bitrate) +{ + struct ieee80211_rate *result = &sband->bitrates[0]; + int i; + + for (i = 0; i < sband->n_bitrates; i++) { + if (!(basic_rates & BIT(i))) + continue; + if (sband->bitrates[i].bitrate > bitrate) + continue; + result = &sband->bitrates[i]; + } + + return result; +} +EXPORT_SYMBOL(ieee80211_get_response_rate); + int ieee80211_channel_to_frequency(int chan) { if (chan < 14) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c new file mode 100644 index 00000000000..58e489fd4ae --- /dev/null +++ b/net/wireless/wext-compat.c @@ -0,0 +1,139 @@ +/* + * cfg80211 - wext compat code + * + * This is temporary code until all wireless functionality is migrated + * into cfg80211, when that happens all the exports here go away and + * we directly assign the wireless handlers of wireless interfaces. + * + * Copyright 2008 Johannes Berg <johannes@sipsolutions.net> + */ + +#include <linux/wireless.h> +#include <linux/nl80211.h> +#include <net/iw_handler.h> +#include <net/wireless.h> +#include <net/cfg80211.h> +#include "core.h" + +int cfg80211_wext_giwname(struct net_device *dev, + struct iw_request_info *info, + char *name, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_supported_band *sband; + bool is_ht = false, is_a = false, is_b = false, is_g = false; + + if (!wdev) + return -EOPNOTSUPP; + + sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ]; + if (sband) { + is_a = true; + is_ht |= sband->ht_cap.ht_supported; + } + + sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ]; + if (sband) { + int i; + /* Check for mandatory rates */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) + is_b = true; + if (sband->bitrates[i].bitrate == 60) + is_g = true; + } + is_ht |= sband->ht_cap.ht_supported; + } + + strcpy(name, "IEEE 802.11"); + if (is_a) + strcat(name, "a"); + if (is_b) + strcat(name, "b"); + if (is_g) + strcat(name, "g"); + if (is_ht) + strcat(name, "n"); + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwname); + +int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev; + struct vif_params vifparams; + enum nl80211_iftype type; + + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_dev(wdev->wiphy); + + if (!rdev->ops->change_virtual_intf) + return -EOPNOTSUPP; + + /* don't support changing VLANs, you just re-create them */ + if (wdev->iftype == NL80211_IFTYPE_AP_VLAN) + return -EOPNOTSUPP; + + switch (*mode) { + case IW_MODE_INFRA: + type = NL80211_IFTYPE_STATION; + break; + case IW_MODE_ADHOC: + type = NL80211_IFTYPE_ADHOC; + break; + case IW_MODE_REPEAT: + type = NL80211_IFTYPE_WDS; + break; + case IW_MODE_MONITOR: + type = NL80211_IFTYPE_MONITOR; + break; + default: + return -EINVAL; + } + + memset(&vifparams, 0, sizeof(vifparams)); + + return rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type, + NULL, &vifparams); +} +EXPORT_SYMBOL(cfg80211_wext_siwmode); + +int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (!wdev) + return -EOPNOTSUPP; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + *mode = IW_MODE_MASTER; + break; + case NL80211_IFTYPE_STATION: + *mode = IW_MODE_INFRA; + break; + case NL80211_IFTYPE_ADHOC: + *mode = IW_MODE_ADHOC; + break; + case NL80211_IFTYPE_MONITOR: + *mode = IW_MODE_MONITOR; + break; + case NL80211_IFTYPE_WDS: + *mode = IW_MODE_REPEAT; + break; + case NL80211_IFTYPE_AP_VLAN: + *mode = IW_MODE_SECOND; /* FIXME */ + break; + default: + *mode = IW_MODE_AUTO; + break; + } + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwmode); diff --git a/net/wireless/wext.c b/net/wireless/wext.c index d98ffb75119..e49a2d1ef1e 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -64,7 +64,7 @@ * o Remove spy_offset from struct iw_handler_def * o Start deprecating dev->get_wireless_stats, output a warning * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless - * o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats) + * o Don't lose INVALID/DBM flags when clearing UPDATED flags (iwstats) * * v8 - 17.02.06 - Jean II * o RtNetlink requests support (SET/GET) diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 6ebda25c24e..a5d3416522d 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -24,8 +24,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -35,8 +35,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -46,8 +46,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -57,8 +57,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -68,8 +68,8 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_ack_holdback_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, + .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, @@ -79,7 +79,7 @@ static struct ctl_table x25_table[] = { .data = &sysctl_x25_forward, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { 0, }, }; diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 0f439a72cca..c631047e1b2 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,8 +3,8 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_output.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o \ + xfrm_sysctl.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o - diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 75279402ccf..b4a13178fb4 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -104,6 +104,7 @@ EXPORT_SYMBOL(xfrm_prepare_input); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { + struct net *net = dev_net(skb->dev); int err; __be32 seq; struct xfrm_state *x; @@ -127,7 +128,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -141,19 +142,19 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } do { if (skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } - x = xfrm_state_lookup(daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, daddr, spi, nexthdr, family); if (x == NULL) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; } @@ -162,22 +163,22 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; } if ((x->encap ? x->encap->encap_type : 0) != encap_type) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); goto drop_unlock; } if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } if (xfrm_state_check_expire(x)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); goto drop_unlock; } @@ -198,7 +199,7 @@ resume: x->type->proto); x->stats.integrity_failed++; } - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); goto drop_unlock; } @@ -224,7 +225,7 @@ resume: } if (inner_mode->input(x, skb)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -242,7 +243,7 @@ resume: err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } } while (!err); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index dc50f1e71f7..c235597ba8d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -41,6 +41,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct net *net = xs_net(x); if (err <= 0) goto resume; @@ -48,33 +49,33 @@ static int xfrm_output_one(struct sk_buff *skb, int err) do { err = xfrm_state_check_space(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; } err = x->outer_mode->output(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; } spin_lock_bh(&x->lock); err = xfrm_state_check_expire(x); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); goto error; } if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; goto error; } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -89,12 +90,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) resume: if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error_nolock; } if (!(skb->dst = dst_pop(dst))) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } @@ -178,6 +179,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int xfrm_output(struct sk_buff *skb) { + struct net *net = dev_net(skb->dst->dev); int err; if (skb_is_gso(skb)) @@ -186,7 +188,7 @@ int xfrm_output(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fb216c9adf8..9c068ab3a83 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,28 +34,16 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly = 1; - -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_all; -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); - static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static struct kmem_cache *xfrm_dst_cache __read_mostly; -static struct work_struct xfrm_policy_gc_work; static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); @@ -63,6 +51,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); +static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); + static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) { @@ -97,7 +88,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -static inline struct dst_entry *__xfrm_dst_lookup(int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr, int family) @@ -109,7 +100,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); @@ -121,6 +112,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; @@ -134,7 +126,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -229,13 +221,14 @@ expired: * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); @@ -296,6 +289,7 @@ static void xfrm_policy_gc_task(struct work_struct *work) hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); } +static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task); /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. @@ -322,38 +316,29 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) schedule_work(&xfrm_policy_gc_work); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int idx_hash(u32 index) +static inline unsigned int idx_hash(struct net *net, u32 index) { - return __idx_hash(index, xfrm_idx_hmask); + return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); + &net->xfrm.policy_inexact[dir] : + net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return xfrm_policy_bydst[dir].table + hash; + return net->xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, @@ -408,12 +393,12 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) return ((old_hmask + 1) << 1) - 1; } -static void xfrm_bydst_resize(int dir) +static void xfrm_bydst_resize(struct net *net, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; @@ -425,20 +410,20 @@ static void xfrm_bydst_resize(int dir) for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(int total) +static void xfrm_byidx_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; @@ -450,18 +435,18 @@ static void xfrm_byidx_resize(int total) for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - xfrm_policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } -static inline int xfrm_bydst_should_resize(int dir, int *total) +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { - unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -473,9 +458,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) return 0; } -static inline int xfrm_byidx_should_resize(int total) +static inline int xfrm_byidx_should_resize(struct net *net, int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -487,41 +472,40 @@ static inline int xfrm_byidx_should_resize(int total) void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; - si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; - si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = xfrm_idx_hmask; + si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = init_net.xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(u8 type, int dir) +static u32 xfrm_gen_index(struct net *net, int dir) { static u32 idx_generator; @@ -536,7 +520,7 @@ static u32 xfrm_gen_index(u8 type, int dir) idx_generator += 8; if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); + list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, entry, list, byidx) { if (p->index == idx) { @@ -566,6 +550,7 @@ static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { + struct net *net = xp_net(policy); struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; @@ -573,7 +558,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { @@ -600,27 +585,23 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; + net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); - if (delpol) { - hlist_del(&delpol->bydst); - hlist_del(&delpol->byidx); - list_del(&delpol->walk.all); - xfrm_policy_count[dir]--; - } - policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + if (delpol) + __xfrm_policy_unlink(delpol, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &xfrm_policy_all); + list_add(&policy->walk.all, &net->xfrm.policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; @@ -654,7 +635,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) @@ -665,7 +646,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && @@ -679,10 +660,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -698,8 +676,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, + int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -711,7 +689,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { if (pol->type == type && pol->index == id) { @@ -723,10 +701,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, write_unlock_bh(&xfrm_policy_lock); return pol; } - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + __xfrm_policy_unlink(pol, dir); } ret = pol; break; @@ -744,7 +719,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; @@ -754,7 +729,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); @@ -766,9 +741,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) return err; } } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -788,36 +763,33 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; write_lock_bh(&xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) goto out; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; - int i, killed; + int i; - killed = 0; again1: hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); + __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, @@ -825,22 +797,19 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again1; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); + __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -848,14 +817,12 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); - killed++; write_lock_bh(&xfrm_policy_lock); goto again2; } } - xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); out: @@ -864,7 +831,7 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(struct xfrm_policy_walk *walk, +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { @@ -881,10 +848,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, write_lock_bh(&xfrm_policy_lock); if (list_empty(&walk->walk.all)) - x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); - list_for_each_entry_from(x, &xfrm_policy_all, all) { + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); @@ -953,7 +920,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -969,7 +937,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -986,7 +954,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -1009,14 +977,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1024,7 +992,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1083,29 +1051,32 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct hlist_head *chain = policy_hash_bysel(&pol->selector, + struct net *net = xp_net(pol); + struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, pol->family, dir); - list_add(&pol->walk.all, &xfrm_policy_all); + list_add(&pol->walk.all, &net->xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); + net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { + struct net *net = xp_net(pol); + if (hlist_unhashed(&pol->bydst)) return NULL; hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + net->xfrm.policy_count[dir]--; return pol; } @@ -1127,6 +1098,7 @@ EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { + struct net *net = xp_net(pol); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY @@ -1139,7 +1111,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -1154,7 +1126,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); if (newp) { newp->selector = old->selector; @@ -1194,7 +1166,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1202,7 +1174,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1214,6 +1186,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1232,7 +1205,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; @@ -1546,7 +1519,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1576,7 +1549,7 @@ restart: policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1584,14 +1557,14 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1614,7 +1587,7 @@ restart: default: case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; @@ -1634,7 +1607,7 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = PTR_ERR(dst); goto error; } @@ -1644,17 +1617,18 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); err = PTR_ERR(pols[1]); goto error; } if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } @@ -1681,27 +1655,27 @@ restart: if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); xfrm_pol_put(policy); return -EREMOTE; } if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -ERESTART; goto error; } @@ -1713,7 +1687,7 @@ restart: err = nx; } if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } } @@ -1726,7 +1700,7 @@ restart: dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); err = PTR_ERR(dst); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); goto error; } @@ -1747,9 +1721,9 @@ restart: dst_free(dst); if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = -EHOSTUNREACH; goto error; } @@ -1761,7 +1735,7 @@ restart: if (unlikely(err)) { write_unlock_bh(&policy->lock); dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); goto error; } @@ -1790,10 +1764,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1901,6 +1875,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1916,7 +1891,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1929,7 +1904,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1939,24 +1914,24 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1968,12 +1943,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); @@ -1997,11 +1972,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -2025,20 +2000,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -2050,15 +2025,16 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; if (xfrm_decode_session(skb, &fl, family) < 0) { /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); @@ -2142,7 +2118,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent write_unlock(&pol->lock); } -static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) +static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) { struct dst_entry *gc_list = NULL; int dir; @@ -2155,11 +2131,11 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) + &net->xfrm.policy_inexact[dir], bydst) prune_one_bundle(pol, func, &gc_list); - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + table = net->xfrm.policy_bydst[dir].table; + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) prune_one_bundle(pol, func, &gc_list); } @@ -2178,14 +2154,14 @@ static int unused_bundle(struct dst_entry *dst) return !atomic_read(&dst->__refcnt); } -static void __xfrm_garbage_collect(void) +static void __xfrm_garbage_collect(struct net *net) { - xfrm_prune_bundles(unused_bundle); + xfrm_prune_bundles(net, unused_bundle); } -static int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(struct net *net) { - xfrm_prune_bundles(stale_bundle); + xfrm_prune_bundles(net, stale_bundle); return 0; } @@ -2371,38 +2347,54 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void { struct net_device *dev = ptr; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - switch (event) { case NETDEV_DOWN: - xfrm_flush_bundles(); + xfrm_flush_bundles(dev_net(dev)); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { - xfrm_dev_event, - NULL, - 0 + .notifier_call = xfrm_dev_event, }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, + int rv; + + if (snmp_mib_init((void **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; + rv = xfrm_proc_init(net); + if (rv < 0) + snmp_mib_free((void **)net->mib.xfrm_statistics); + return rv; +} + +static void xfrm_statistics_fini(struct net *net) +{ + xfrm_proc_fini(net); + snmp_mib_free((void **)net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif -static void __init xfrm_policy_init(void) +static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); @@ -2410,39 +2402,124 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_hash_alloc(sz); - xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; + net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + net->xfrm.policy_count[dir] = 0; + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); - htab = &xfrm_policy_bydst[dir]; + htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); + goto out_bydst; + htab->hmask = hmask; } - INIT_LIST_HEAD(&xfrm_policy_all); - INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); - register_netdevice_notifier(&xfrm_dev_notifier); + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; + +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); +out_byidx: + return -ENOMEM; } -void __init xfrm_init(void) +static void xfrm_policy_fini(struct net *net) { -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); + struct xfrm_audit audit_info; + unsigned int sz; + int dir; + + flush_work(&net->xfrm.policy_hash_work); +#ifdef CONFIG_XFRM_SUB_POLICY + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); #endif - xfrm_state_init(); - xfrm_policy_init(); + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + flush_work(&xfrm_policy_gc_work); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); + } + + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); +} + +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; + return 0; + +out_sysctl: + xfrm_policy_fini(net); +out_policy: + xfrm_state_fini(net); +out_state: + xfrm_statistics_fini(net); +out_statistics: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + xfrm_sysctl_fini(net); + xfrm_policy_fini(net); + xfrm_state_fini(net); + xfrm_statistics_fini(net); +} + +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + +void __init xfrm_init(void) +{ + register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL @@ -2458,25 +2535,21 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, switch(sel->family) { case AF_INET: - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, - NIPQUAD(sel->saddr.a4)); + audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIPQUAD_FMT, - NIPQUAD(sel->daddr.a4)); + audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); if (sel->prefixlen_d != 32) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; case AF_INET6: - audit_log_format(audit_buf, " src=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->saddr.a6)); + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=" NIP6_FMT, - NIP6(*(struct in6_addr *)sel->daddr.a6)); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); @@ -2546,7 +2619,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -2555,7 +2628,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &init_net.xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 2b0db13f0cd..284eaef1dbf 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -59,17 +59,18 @@ fold_field(void *mib[], int offt) static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; for (i=0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - fold_field((void **)xfrm_statistics, + fold_field((void **)net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) { - return single_open(file, xfrm_statistics_seq_show, NULL); + return single_open_net(inode, file, xfrm_statistics_seq_show); } static struct file_operations xfrm_statistics_seq_fops = { @@ -77,21 +78,18 @@ static struct file_operations xfrm_statistics_seq_fops = { .open = xfrm_statistics_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; -int __init xfrm_proc_init(void) +int __net_init xfrm_proc_init(struct net *net) { - int rc = 0; - - if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO, + if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, &xfrm_statistics_seq_fops)) - goto stat_fail; - - out: - return rc; + return -ENOMEM; + return 0; +} - stat_fail: - rc = -ENOMEM; - goto out; +void xfrm_proc_fini(struct net *net) +{ + proc_net_remove(net, "xfrm_stat"); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 508337f9724..e25ff62ab2a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -24,17 +24,6 @@ #include "xfrm_hash.h" -struct sock *xfrm_nl; -EXPORT_SYMBOL(xfrm_nl); - -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -44,19 +33,7 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static LIST_HEAD(xfrm_state_all); -static struct hlist_head *xfrm_state_bydst __read_mostly; -static struct hlist_head *xfrm_state_bysrc __read_mostly; -static struct hlist_head *xfrm_state_byspi __read_mostly; -static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); @@ -69,25 +46,27 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, #define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) #endif /* CONFIG_AUDITSYSCALL */ -static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, +static inline unsigned int xfrm_dst_hash(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, unsigned short family) { - return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } -static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, +static inline unsigned int xfrm_src_hash(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static void xfrm_hash_transfer(struct hlist_head *list, @@ -121,16 +100,16 @@ static void xfrm_hash_transfer(struct hlist_head *list, } } -static unsigned long xfrm_hash_new_size(void) +static unsigned long xfrm_hash_new_size(unsigned int state_hmask) { - return ((xfrm_state_hmask + 1) << 1) * - sizeof(struct hlist_head); + return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); } static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_hash_work); struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; @@ -138,7 +117,7 @@ static void xfrm_hash_resize(struct work_struct *__unused) mutex_lock(&hash_resize_mutex); - nsize = xfrm_hash_new_size(); + nsize = xfrm_hash_new_size(net->xfrm.state_hmask); ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; @@ -157,19 +136,19 @@ static void xfrm_hash_resize(struct work_struct *__unused) spin_lock_bh(&xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = xfrm_state_hmask; i >= 0; i--) - xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + for (i = net->xfrm.state_hmask; i >= 0; i--) + xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); - odst = xfrm_state_bydst; - osrc = xfrm_state_bysrc; - ospi = xfrm_state_byspi; - ohashmask = xfrm_state_hmask; + odst = net->xfrm.state_bydst; + osrc = net->xfrm.state_bysrc; + ospi = net->xfrm.state_byspi; + ohashmask = net->xfrm.state_hmask; - xfrm_state_bydst = ndst; - xfrm_state_bysrc = nsrc; - xfrm_state_byspi = nspi; - xfrm_state_hmask = nhashmask; + net->xfrm.state_bydst = ndst; + net->xfrm.state_bysrc = nsrc; + net->xfrm.state_byspi = nspi; + net->xfrm.state_hmask = nhashmask; spin_unlock_bh(&xfrm_state_lock); @@ -182,16 +161,9 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - -DECLARE_WAIT_QUEUE_HEAD(km_waitq); -EXPORT_SYMBOL(km_waitq); - static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; -static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -401,20 +373,21 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(struct work_struct *data) +static void xfrm_state_gc_task(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *entry, *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&xfrm_state_gc_list, &gc_list); + hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -428,6 +401,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; + struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -485,7 +459,7 @@ resched: expired: if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); next = 2; goto resched; } @@ -504,13 +478,14 @@ out: static void xfrm_replay_timer_handler(unsigned long data); -struct xfrm_state *xfrm_state_alloc(void) +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); @@ -537,17 +512,20 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { + struct net *net = xs_net(x); + WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + schedule_work(&net->xfrm.state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); int __xfrm_state_delete(struct xfrm_state *x) { + struct net *net = xs_net(x); int err = -ESRCH; if (x->km.state != XFRM_STATE_DEAD) { @@ -558,7 +536,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); - xfrm_state_num--; + net->xfrm.state_num--; spin_unlock(&xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. @@ -587,15 +565,15 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -611,26 +589,26 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) } #else static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; spin_lock_bh(&xfrm_state_lock); - err = xfrm_state_flush_secctx_check(proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -652,7 +630,7 @@ restart: out: spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -660,8 +638,8 @@ EXPORT_SYMBOL(xfrm_state_flush); void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); - si->sadcnt = xfrm_state_num; - si->sadhcnt = xfrm_state_hmask; + si->sadcnt = init_net.xfrm.state_num; + si->sadhcnt = init_net.xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); } @@ -681,13 +659,13 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -713,13 +691,13 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(daddr, saddr, family); + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -751,21 +729,23 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct net *net = xs_net(x); + if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(&x->id.daddr, + return __xfrm_state_lookup_byaddr(net, &x->id.daddr, &x->props.saddr, x->id.proto, family); } -static void xfrm_hash_grow_check(int have_hash_collision) +static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) { if (have_hash_collision && - (xfrm_state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > xfrm_state_hmask) - schedule_work(&xfrm_hash_work); + (net->xfrm.state_hmask + 1) < xfrm_state_hashmax && + net->xfrm.state_num > net->xfrm.state_hmask) + schedule_work(&net->xfrm.state_hash_work); } struct xfrm_state * @@ -774,6 +754,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { + struct net *net = xp_net(pol); unsigned int h; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; @@ -784,8 +765,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -829,13 +810,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; @@ -854,19 +835,19 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add(&x->km.all, &xfrm_state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); - xfrm_state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -886,7 +867,8 @@ out: } struct xfrm_state * -xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { unsigned int h; @@ -894,8 +876,8 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct hlist_node *entry; spin_lock(&xfrm_state_lock); - h = xfrm_dst_hash(daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -919,48 +901,50 @@ EXPORT_SYMBOL(xfrm_stateonly_find); static void __xfrm_state_insert(struct xfrm_state *x) { + struct net *net = xs_net(x); unsigned int h; x->genid = ++xfrm_state_genid; - list_add(&x->km.all, &xfrm_state_all); + list_add(&x->km.all, &net->xfrm.state_all); - h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } mod_timer(&x->timer, jiffies + HZ); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } /* xfrm_state_lock is held */ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) { + struct net *net = xs_net(xnew); unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; struct hlist_node *entry; unsigned int h; - h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && @@ -979,13 +963,13 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1017,7 +1001,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(); + x = xfrm_state_alloc(net); if (likely(x)) { switch (family) { case AF_INET: @@ -1048,27 +1032,28 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); - list_add(&x->km.all, &xfrm_state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); - xfrm_state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); int xfrm_state_add(struct xfrm_state *x) { + struct net *net = xs_net(x); struct xfrm_state *x1, *to_put; int family; int err; @@ -1089,7 +1074,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); + x1 = __xfrm_find_acq_byseq(net, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1098,7 +1083,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -1124,8 +1109,9 @@ EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) { + struct net *net = xs_net(orig); int err = -ENOMEM; - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); if (!x) goto error; @@ -1206,9 +1192,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) struct hlist_node *entry; if (m->reqid) { - h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1223,9 +1209,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) return x; } } else { - h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, + h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1369,40 +1355,41 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, +xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); + x = __xfrm_state_lookup(net, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_lookup_byaddr(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1449,15 +1436,15 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) { int i; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); @@ -1468,12 +1455,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); + x = __xfrm_find_acq_byseq(net, seq); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1494,6 +1481,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { + struct net *net = xs_net(x); unsigned int h; struct xfrm_state *x0; int err = -ENOENT; @@ -1511,7 +1499,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1521,7 +1509,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; h<high-low+1; h++) { spi = low + net_random()%(high-low+1); - x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; @@ -1531,8 +1519,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); spin_unlock_bh(&xfrm_state_lock); err = 0; @@ -1545,7 +1533,7 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(struct xfrm_state_walk *walk, +int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { @@ -1558,10 +1546,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, spin_lock_bh(&xfrm_state_lock); if (list_empty(&walk->all)) - x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); else x = list_entry(&walk->all, struct xfrm_state_walk, all); - list_for_each_entry_from(x, &xfrm_state_all, all) { + list_for_each_entry_from(x, &net->xfrm.state_all, all) { if (x->state == XFRM_STATE_DEAD) continue; state = container_of(x, struct xfrm_state, km); @@ -1660,7 +1648,7 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_lock(&x->lock); if (x->km.state == XFRM_STATE_VALID) { - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; @@ -1716,7 +1704,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) x->replay.bitmap |= (1U << diff); } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -1749,6 +1737,7 @@ EXPORT_SYMBOL(km_state_notify); void km_state_expired(struct xfrm_state *x, int hard, u32 pid) { + struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; @@ -1757,7 +1746,7 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid) km_state_notify(x, &c); if (hard) - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1800,6 +1789,7 @@ EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) { + struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; @@ -1808,7 +1798,7 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) km_policy_notify(pol, dir, &c); if (hard) - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_policy_expired); @@ -1835,7 +1825,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, EXPORT_SYMBOL(km_migrate); #endif -int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; int ret; @@ -1844,7 +1834,7 @@ int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->report) { - ret = km->report(proto, sel, addr); + ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } @@ -2032,8 +2022,9 @@ int xfrm_init_state(struct xfrm_state *x) x->inner_mode = inner_mode; } else { struct xfrm_mode *inner_mode_iaf; + int iafamily = AF_INET; - inner_mode = xfrm_get_mode(x->props.mode, AF_INET); + inner_mode = xfrm_get_mode(x->props.mode, x->props.family); if (inner_mode == NULL) goto error; @@ -2041,22 +2032,17 @@ int xfrm_init_state(struct xfrm_state *x) xfrm_put_mode(inner_mode); goto error; } + x->inner_mode = inner_mode; - inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6); - if (inner_mode_iaf == NULL) - goto error; - - if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) { - xfrm_put_mode(inner_mode_iaf); - goto error; - } + if (x->props.family == AF_INET) + iafamily = AF_INET6; - if (x->props.family == AF_INET) { - x->inner_mode = inner_mode; - x->inner_mode_iaf = inner_mode_iaf; - } else { - x->inner_mode = inner_mode_iaf; - x->inner_mode_iaf = inner_mode; + inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); + if (inner_mode_iaf) { + if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) + x->inner_mode_iaf = inner_mode_iaf; + else + xfrm_put_mode(inner_mode_iaf); } } @@ -2080,20 +2066,61 @@ error: EXPORT_SYMBOL(xfrm_init_state); -void __init xfrm_state_init(void) +int __net_init xfrm_state_init(struct net *net) { unsigned int sz; + INIT_LIST_HEAD(&net->xfrm.state_all); + sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_hash_alloc(sz); - xfrm_state_bysrc = xfrm_hash_alloc(sz); - xfrm_state_byspi = xfrm_hash_alloc(sz); - if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) - panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); - xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + net->xfrm.state_bydst = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bydst) + goto out_bydst; + net->xfrm.state_bysrc = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bysrc) + goto out_bysrc; + net->xfrm.state_byspi = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byspi) + goto out_byspi; + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + + net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + INIT_HLIST_HEAD(&net->xfrm.state_gc_list); + INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); + init_waitqueue_head(&net->xfrm.km_waitq); + return 0; + +out_byspi: + xfrm_hash_free(net->xfrm.state_bysrc, sz); +out_bysrc: + xfrm_hash_free(net->xfrm.state_bydst, sz); +out_bydst: + return -ENOMEM; +} + +void xfrm_state_fini(struct net *net) +{ + struct xfrm_audit audit_info; + unsigned int sz; + + flush_work(&net->xfrm.state_hash_work); + audit_info.loginuid = -1; + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); + flush_work(&net->xfrm.state_gc_work); + + WARN_ON(!list_empty(&net->xfrm.state_all)); - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byspi)); + xfrm_hash_free(net->xfrm.state_byspi, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); + xfrm_hash_free(net->xfrm.state_bysrc, sz); + WARN_ON(!hlist_empty(net->xfrm.state_bydst)); + xfrm_hash_free(net->xfrm.state_bydst, sz); } #ifdef CONFIG_AUDITSYSCALL @@ -2109,16 +2136,12 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, switch(x->props.family) { case AF_INET: - audit_log_format(audit_buf, - " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT, - NIPQUAD(x->props.saddr.a4), - NIPQUAD(x->id.daddr.a4)); + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &x->props.saddr.a4, &x->id.daddr.a4); break; case AF_INET6: - audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT, - NIP6(*(struct in6_addr *)x->props.saddr.a6), - NIP6(*(struct in6_addr *)x->id.daddr.a6)); + audit_log_format(audit_buf, " src=%pI6 dst=%pI6", + x->props.saddr.a6, x->id.daddr.a6); break; } @@ -2134,18 +2157,14 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, switch (family) { case AF_INET: iph4 = ip_hdr(skb); - audit_log_format(audit_buf, - " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT, - NIPQUAD(iph4->saddr), - NIPQUAD(iph4->daddr)); + audit_log_format(audit_buf, " src=%pI4 dst=%pI4", + &iph4->saddr, &iph4->daddr); break; case AF_INET6: iph6 = ipv6_hdr(skb); audit_log_format(audit_buf, - " src=" NIP6_FMT " dst=" NIP6_FMT - " flowlbl=0x%x%02x%02x", - NIP6(iph6->saddr), - NIP6(iph6->daddr), + " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", + &iph6->saddr,&iph6->daddr, iph6->flow_lbl[0] & 0x0f, iph6->flow_lbl[1], iph6->flow_lbl[2]); diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 00000000000..2e6ffb66f06 --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,85 @@ +#include <linux/sysctl.h> +#include <net/net_namespace.h> +#include <net/xfrm.h> + +static void __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a278a6f3b99..b95a2d64eb5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -316,11 +316,12 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) x->replay_maxdiff = nla_get_u32(rt); } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) @@ -367,9 +368,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; @@ -391,6 +392,7 @@ error_no_put: static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; @@ -403,7 +405,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - x = xfrm_state_construct(p, attrs, &err); + x = xfrm_state_construct(net, p, attrs, &err); if (!x) return err; @@ -431,7 +433,8 @@ out: return err; } -static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, +static struct xfrm_state *xfrm_user_state_lookup(struct net *net, + struct xfrm_usersa_id *p, struct nlattr **attrs, int *errp) { @@ -440,7 +443,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(net, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -451,8 +454,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); + x = xfrm_state_lookup_byaddr(net, &p->daddr, saddr, + p->proto, p->family); } out: @@ -464,6 +467,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err = -ESRCH; struct km_event c; @@ -472,7 +476,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, u32 sessionid = NETLINK_CB(skb).sessionid; u32 sid = NETLINK_CB(skb).sid; - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) return err; @@ -615,6 +619,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb) static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; @@ -631,7 +636,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) xfrm_state_walk_init(walk, 0); } - (void) xfrm_state_walk(walk, dump_one_state, &info); + (void) xfrm_state_walk(net, walk, dump_one_state, &info); return skb->len; } @@ -703,6 +708,7 @@ nla_put_failure: static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -715,7 +721,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_spdinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -756,6 +762,7 @@ nla_put_failure: static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -768,18 +775,19 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_sadinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, attrs, &err); + x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) goto out_noput; @@ -787,7 +795,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -820,6 +828,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; struct sk_buff *resp_skb; @@ -837,7 +846,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); + x = xfrm_find_acq_byseq(net, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -845,7 +854,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -863,7 +872,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -1078,9 +1087,9 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; if (!xp) { @@ -1110,6 +1119,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; @@ -1126,7 +1136,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - xp = xfrm_policy_construct(p, attrs, &err); + xp = xfrm_policy_construct(net, p, attrs, &err); if (!xp) return err; @@ -1263,6 +1273,7 @@ static int xfrm_dump_policy_done(struct netlink_callback *cb) static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; struct xfrm_dump_info info; @@ -1279,7 +1290,7 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); } - (void) xfrm_policy_walk(walk, dump_one_policy, &info); + (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; } @@ -1311,6 +1322,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; u8 type = XFRM_POLICY_TYPE_MAIN; @@ -1330,7 +1342,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1347,7 +1359,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, + xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } @@ -1361,7 +1373,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } } else { @@ -1390,6 +1402,7 @@ out: static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); struct xfrm_audit audit_info; @@ -1398,13 +1411,14 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_state_flush(p->proto, &audit_info); + err = xfrm_state_flush(net, p->proto, &audit_info); if (err) return err; c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_state_notify(NULL, &c); return 0; @@ -1457,6 +1471,7 @@ nla_put_failure: static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct sk_buff *r_skb; int err; @@ -1468,7 +1483,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1486,7 +1501,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1495,6 +1510,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct km_event c; int err = - EINVAL; @@ -1509,7 +1525,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + x = xfrm_state_lookup(net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1534,6 +1550,7 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; @@ -1546,13 +1563,14 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_policy_flush(type, &audit_info); + err = xfrm_policy_flush(net, type, &audit_info); if (err) return err; c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; } @@ -1560,6 +1578,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; @@ -1571,7 +1590,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1588,7 +1607,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, 0, &err); + xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -1623,12 +1642,13 @@ out: static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err; struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; - x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(net, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) @@ -1657,31 +1677,27 @@ out: static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; struct nlattr *rt = attrs[XFRMA_TMPL]; struct xfrm_user_acquire *ua = nlmsg_data(nlh); - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) - return err; + goto nomem; err = verify_newpolicy_info(&ua->policy); - if (err) { - printk("BAD policy passed\n"); - kfree(x); - return err; - } + if (err) + goto bad_policy; /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, attrs, &err); - if (!xp) { - kfree(x); - return err; - } + xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); + if (!xp) + goto free_state; memcpy(&x->id, &ua->id, sizeof(ua->id)); memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); @@ -1706,6 +1722,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(xp); return 0; + +bad_policy: + printk("BAD policy passed\n"); +free_state: + kfree(x); +nomem: + return err; } #ifdef CONFIG_XFRM_MIGRATE @@ -1869,6 +1892,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); @@ -1879,7 +1903,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1968,6 +1992,7 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; int type, err; @@ -1989,7 +2014,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done); + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, @@ -2033,6 +2058,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); @@ -2042,11 +2068,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); @@ -2056,11 +2083,12 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) { + struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2081,7 +2109,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2111,6 +2139,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) { + struct net *net = xs_net(x); struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; @@ -2155,7 +2184,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nla_put_failure: /* Somebody screwed up with xfrm_sa_len! */ @@ -2235,6 +2264,7 @@ nlmsg_failure: static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); @@ -2244,7 +2274,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2253,6 +2283,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { + struct net *net = sock_net(sk); struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); struct xfrm_policy *xp; @@ -2291,7 +2322,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) { *dir = -ENOBUFS; return NULL; @@ -2344,6 +2375,7 @@ nlmsg_failure: static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = xp_net(xp); struct sk_buff *skb; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); @@ -2353,11 +2385,12 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; @@ -2408,7 +2441,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2417,6 +2450,7 @@ nlmsg_failure: static int xfrm_notify_policy_flush(struct km_event *c) { + struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2432,7 +2466,7 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2488,8 +2522,8 @@ nla_put_failure: return -EMSGSIZE; } -static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, - xfrm_address_t *addr) +static int xfrm_send_report(struct net *net, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; @@ -2500,7 +2534,59 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + +static inline size_t xfrm_mapping_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); +} + +static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, + xfrm_address_t *new_saddr, __be16 new_sport) +{ + struct xfrm_user_mapping *um; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0); + if (nlh == NULL) + return -EMSGSIZE; + + um = nlmsg_data(nlh); + + memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); + um->id.spi = x->id.spi; + um->id.family = x->props.family; + um->id.proto = x->id.proto; + memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr)); + memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr)); + um->new_sport = new_sport; + um->old_sport = x->encap->encap_sport; + um->reqid = x->props.reqid; + + return nlmsg_end(skb, nlh); +} + +static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, + __be16 sport) +{ + struct net *net = xs_net(x); + struct sk_buff *skb; + + if (x->id.proto != IPPROTO_ESP) + return -EINVAL; + + if (!x->encap) + return -EINVAL; + + skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_mapping(skb, x, ipaddr, sport) < 0) + BUG(); + + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { @@ -2511,35 +2597,56 @@ static struct xfrm_mgr netlink_mgr = { .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, .migrate = xfrm_send_migrate, + .new_mapping = xfrm_send_mapping, }; -static int __init xfrm_user_init(void) +static int __net_init xfrm_user_net_init(struct net *net) { struct sock *nlsk; - printk(KERN_INFO "Initializing XFRM netlink socket\n"); - - nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; - rcu_assign_pointer(xfrm_nl, nlsk); - - xfrm_register_km(&netlink_mgr); - + rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } -static void __exit xfrm_user_exit(void) +static void __net_exit xfrm_user_net_exit(struct net *net) { - struct sock *nlsk = xfrm_nl; + struct sock *nlsk = net->xfrm.nlsk; - xfrm_unregister_km(&netlink_mgr); - rcu_assign_pointer(xfrm_nl, NULL); + rcu_assign_pointer(net->xfrm.nlsk, NULL); synchronize_rcu(); netlink_kernel_release(nlsk); } +static struct pernet_operations xfrm_user_net_ops = { + .init = xfrm_user_net_init, + .exit = xfrm_user_net_exit, +}; + +static int __init xfrm_user_init(void) +{ + int rv; + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + + rv = register_pernet_subsys(&xfrm_user_net_ops); + if (rv < 0) + return rv; + rv = xfrm_register_km(&netlink_mgr); + if (rv < 0) + unregister_pernet_subsys(&xfrm_user_net_ops); + return rv; +} + +static void __exit xfrm_user_exit(void) +{ + xfrm_unregister_km(&netlink_mgr); + unregister_pernet_subsys(&xfrm_user_net_ops); +} + module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); |