diff options
Diffstat (limited to 'net/bridge')
42 files changed, 4222 insertions, 673 deletions
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index e143ca67888..9190ae462cb 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -31,3 +31,17 @@ config BRIDGE will be called bridge. If unsure, say N. + +config BRIDGE_IGMP_SNOOPING + bool "IGMP/MLD snooping" + depends on BRIDGE + depends on INET + default y + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + forward multicast traffic based on IGMP/MLD traffic received from + each port. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index f444c12cde5..d0359ea8ee7 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -12,4 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c index e1241c76239..76357b54775 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -38,7 +38,7 @@ static int __init br_init(void) err = stp_proto_register(&br_stp_proto); if (err < 0) { - printk(KERN_ERR "bridge: can't register sap for STP\n"); + pr_err("bridge: can't register sap for STP\n"); return err; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 1a99c4e04e8..753fc4221f3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -13,8 +13,11 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/list.h> +#include <linux/netfilter_bridge.h> #include <asm/uaccess.h> #include "br_private.h" @@ -25,20 +28,39 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge *br = netdev_priv(dev); const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; + struct net_bridge_mdb_entry *mdst; + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + return NETDEV_TX_OK; + } +#endif + + brstats->tx_packets++; + brstats->tx_bytes += skb->len; + + BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - if (dest[0] & 1) - br_flood_deliver(br, skb); - else if ((dst = __br_fdb_get(br, dest)) != NULL) + if (is_multicast_ether_addr(dest)) { + if (br_multicast_rcv(br, NULL, skb)) + goto out; + + mdst = br_mdb_get(br, skb); + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) + br_multicast_deliver(mdst, skb); + else + br_flood_deliver(br, skb); + } else if ((dst = __br_fdb_get(br, dest)) != NULL) br_deliver(dst->dst, skb); else br_flood_deliver(br, skb); +out: return NETDEV_TX_OK; } @@ -49,6 +71,7 @@ static int br_dev_open(struct net_device *dev) br_features_recompute(br); netif_start_queue(dev); br_stp_enable_bridge(br); + br_multicast_open(br); return 0; } @@ -59,13 +82,41 @@ static void br_dev_set_multicast_list(struct net_device *dev) static int br_dev_stop(struct net_device *dev) { - br_stp_disable_bridge(netdev_priv(dev)); + struct net_bridge *br = netdev_priv(dev); + + br_stp_disable_bridge(br); + br_multicast_stop(br); netif_stop_queue(dev); return 0; } +static struct net_device_stats *br_get_stats(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct br_cpu_netstats sum = { 0 }; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + const struct br_cpu_netstats *bstats + = per_cpu_ptr(br->stats, cpu); + + sum.tx_bytes += bstats->tx_bytes; + sum.tx_packets += bstats->tx_packets; + sum.rx_bytes += bstats->rx_bytes; + sum.rx_packets += bstats->rx_packets; + } + + stats->tx_bytes = sum.tx_bytes; + stats->tx_packets = sum.tx_packets; + stats->rx_bytes = sum.rx_bytes; + stats->rx_packets = sum.rx_packets; + + return stats; +} + static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); @@ -147,6 +198,70 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) return 0; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static bool br_devices_support_netpoll(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool ret = true; + int count = 0; + unsigned long flags; + + spin_lock_irqsave(&br->lock, flags); + list_for_each_entry(p, &br->port_list, list) { + count++; + if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) || + !p->dev->netdev_ops->ndo_poll_controller) + ret = false; + } + spin_unlock_irqrestore(&br->lock, flags); + return count != 0 && ret; +} + +void br_netpoll_cleanup(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p, *n; + const struct net_device_ops *ops; + + br->dev->npinfo = NULL; + list_for_each_entry_safe(p, n, &br->port_list, list) { + if (p->dev) { + ops = p->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(p->dev); + else + p->dev->npinfo = NULL; + } + } +} + +void br_netpoll_disable(struct net_bridge *br, + struct net_device *dev) +{ + if (br_devices_support_netpoll(br)) + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (dev->netdev_ops->ndo_netpoll_cleanup) + dev->netdev_ops->ndo_netpoll_cleanup(dev); + else + dev->npinfo = NULL; +} + +void br_netpoll_enable(struct net_bridge *br, + struct net_device *dev) +{ + if (br_devices_support_netpoll(br)) { + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (br->dev->npinfo) + dev->npinfo = br->dev->npinfo; + } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { + br->dev->priv_flags |= IFF_DISABLE_NETPOLL; + br_info(br,"new device %s does not support netpoll (disabling)", + dev->name); + } +} + +#endif + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -165,19 +280,31 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_start_xmit = br_dev_xmit, + .ndo_get_stats = br_get_stats, .ndo_set_mac_address = br_set_mac_address, .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_cleanup = br_netpoll_cleanup, +#endif }; +static void br_dev_free(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + free_percpu(br->stats); + free_netdev(dev); +} + void br_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = br_dev_free; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 3b8e038ab32..b01dde35a69 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -20,6 +20,7 @@ #include <linux/etherdevice.h> #include <linux/jhash.h> #include <linux/random.h> +#include <linux/slab.h> #include <asm/atomic.h> #include <asm/unaligned.h> #include "br_private.h" @@ -127,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->forward_delay; + unsigned long next_timer = jiffies + br->ageing_time; int i; spin_lock_bh(&br->hash_lock); @@ -148,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data) } spin_unlock_bh(&br->hash_lock); - /* Add HZ/4 to ensure we round the jiffies upwards to be after the next - * timer, otherwise we might round down and will have no-op run. */ - mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4)); + mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); } /* Completely flush all dynamic entries in forwarding database.*/ @@ -352,8 +351,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, */ if (fdb->is_local) return 0; - - printk(KERN_WARNING "%s adding interface with same address " + br_warn(br, "adding interface %s with same address " "as a received packet\n", source->dev->name); fdb_delete(fdb); @@ -396,9 +394,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { if (net_ratelimit()) - printk(KERN_WARNING "%s: received packet with " - "own address as source address\n", - source->dev->name); + br_warn(br, "received packet on %s with " + "own address as source address\n", + source->dev->name); } else { /* fastpath: update of existing entry */ fdb->dst = source; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index bc1704ac6cd..595da45f908 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -11,13 +11,21 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/err.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> #include "br_private.h" +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)); + /* Don't forward packets to originating port or forwarding diasabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) @@ -37,12 +45,11 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { - /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ + /* ip_fragment doesn't copy the MAC header */ if (nf_bridge_maybe_copy_header(skb)) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); } } @@ -52,7 +59,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) int br_forward_finish(struct sk_buff *skb) { - return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, br_dev_queue_push_xmit); } @@ -60,8 +67,8 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { skb->dev = to->dev; - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish); } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) @@ -77,8 +84,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; skb_forward_csum(skb); - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, - br_forward_finish); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + br_forward_finish); } /* called with rcu_read_lock */ @@ -93,61 +100,165 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) } /* called with rcu_read_lock */ -void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { if (should_deliver(to, skb)) { - __br_forward(to, skb); + if (skb0) + deliver_clone(to, skb, __br_forward); + else + __br_forward(to, skb); return; } - kfree_skb(skb); + if (!skb0) + kfree_skb(skb); } -/* called under bridge lock */ -static void br_flood(struct net_bridge *br, struct sk_buff *skb, +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) { + dev->stats.tx_dropped++; + return -ENOMEM; + } + + __packet_hook(prev, skb); + return 0; +} + +static struct net_bridge_port *maybe_deliver( + struct net_bridge_port *prev, struct net_bridge_port *p, + struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { + int err; + + if (!should_deliver(p, skb)) + return prev; + + if (!prev) + goto out; + + err = deliver_clone(prev, skb, __packet_hook); + if (err) + return ERR_PTR(err); + +out: + return p; +} + +/* called under bridge lock */ +static void br_flood(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb0, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)) +{ struct net_bridge_port *p; struct net_bridge_port *prev; prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { - if (should_deliver(p, skb)) { - if (prev != NULL) { - struct sk_buff *skb2; - - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->dev->stats.tx_dropped++; - kfree_skb(skb); - return; - } - - __packet_hook(prev, skb2); - } - - prev = p; - } + prev = maybe_deliver(prev, p, skb, __packet_hook); + if (IS_ERR(prev)) + goto out; } - if (prev != NULL) { + if (!prev) + goto out; + + if (skb0) + deliver_clone(prev, skb, __packet_hook); + else __packet_hook(prev, skb); - return; - } + return; - kfree_skb(skb); +out: + if (!skb0) + kfree_skb(skb); } /* called with rcu_read_lock */ void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, __br_deliver); + br_flood(br, skb, NULL, __br_deliver); } /* called under bridge lock */ -void br_flood_forward(struct net_bridge *br, struct sk_buff *skb) +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb2) +{ + br_flood(br, skb, skb2, __br_forward); +} + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* called with rcu_read_lock */ +static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb0, + void (*__packet_hook)( + const struct net_bridge_port *p, + struct sk_buff *skb)) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *prev = NULL; + struct net_bridge_port_group *p; + struct hlist_node *rp; + + rp = rcu_dereference(br->router_list.first); + p = mdst ? rcu_dereference(mdst->ports) : NULL; + while (p || rp) { + struct net_bridge_port *port, *lport, *rport; + + lport = p ? p->port : NULL; + rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : + NULL; + + port = (unsigned long)lport > (unsigned long)rport ? + lport : rport; + + prev = maybe_deliver(prev, port, skb, __packet_hook); + if (IS_ERR(prev)) + goto out; + + if ((unsigned long)lport >= (unsigned long)port) + p = rcu_dereference(p->next); + if ((unsigned long)rport >= (unsigned long)port) + rp = rcu_dereference(rp->next); + } + + if (!prev) + goto out; + + if (skb0) + deliver_clone(prev, skb, __packet_hook); + else + __packet_hook(prev, skb); + return; + +out: + if (!skb0) + kfree_skb(skb); +} + +/* called with rcu_read_lock */ +void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb) +{ + br_multicast_flood(mdst, skb, NULL, __br_deliver); +} + +/* called with rcu_read_lock */ +void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb2) { - br_flood(br, skb, __br_forward); + br_multicast_flood(mdst, skb, skb2, __br_forward); } +#endif diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a2cbe61f6e6..18b245e2c00 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -13,12 +13,14 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/module.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/if_ether.h> +#include <linux/slab.h> #include <net/sock.h> #include "br_private.h" @@ -131,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; - sysfs_remove_link(br->ifobj, dev->name); + sysfs_remove_link(br->ifobj, p->dev->name); dev_set_promiscuity(dev, -1); @@ -147,9 +149,12 @@ static void del_nbp(struct net_bridge_port *p) rcu_assign_pointer(dev->br_port, NULL); + br_multicast_del_port(p); + kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); + br_netpoll_disable(br, dev); call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -162,6 +167,8 @@ static void del_br(struct net_bridge *br, struct list_head *head) del_nbp(p); } + br_netpoll_cleanup(br->dev); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); @@ -183,6 +190,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name) br = netdev_priv(dev); br->dev = dev; + br->stats = alloc_percpu(struct br_cpu_netstats); + if (!br->stats) { + free_netdev(dev); + return NULL; + } + spin_lock_init(&br->lock); INIT_LIST_HEAD(&br->port_list); spin_lock_init(&br->hash_lock); @@ -206,9 +219,8 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name) br_netfilter_rtable_init(br); - INIT_LIST_HEAD(&br->age_list); - br_stp_timer_init(br); + br_multicast_init(br); return dev; } @@ -260,6 +272,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); + br_multicast_add_port(p); return p; } @@ -435,6 +448,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); + br_netpoll_enable(br, dev); + return 0; err2: br_fdb_delete_by_port(br, p, 1); @@ -467,7 +482,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } -void br_net_exit(struct net *net) +void __net_exit br_net_exit(struct net *net) { struct net_device *dev; LIST_HEAD(list); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ee1a3682bf..d36e700f7a2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -11,6 +11,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -20,18 +21,20 @@ /* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; -static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb) { - struct net_device *indev, *brdev = br->dev; + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); + struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); - brdev->stats.rx_packets++; - brdev->stats.rx_bytes += skb->len; + brstats->rx_packets++; + brstats->rx_bytes += skb->len; indev = skb->dev; skb->dev = brdev; - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, - netif_receive_skb); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, + netif_receive_skb); } /* note: already called with rcu_read_lock (preempt_disabled) */ @@ -41,6 +44,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; struct net_bridge_fdb_entry *dst; + struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; if (!p || p->state == BR_STATE_DISABLED) @@ -50,9 +54,15 @@ int br_handle_frame_finish(struct sk_buff *skb) br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source); + if (is_multicast_ether_addr(dest) && + br_multicast_rcv(br, p, skb)) + goto drop; + if (p->state == BR_STATE_LEARNING) goto drop; + BR_INPUT_SKB_CB(skb)->brdev = br->dev; + /* The packet skb2 goes to the local host (NULL to skip). */ skb2 = NULL; @@ -62,27 +72,35 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_multicast_ether_addr(dest)) { + mdst = br_mdb_get(br, skb); + if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { + if ((mdst && !hlist_unhashed(&mdst->mglist)) || + br_multicast_is_router(br)) + skb2 = skb; + br_multicast_forward(mdst, skb, skb2); + skb = NULL; + if (!skb2) + goto out; + } else + skb2 = skb; + br->dev->stats.multicast++; - skb2 = skb; } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { skb2 = skb; /* Do not forward the packet since it's local. */ skb = NULL; } - if (skb2 == skb) - skb2 = skb_clone(skb, GFP_ATOMIC); - - if (skb2) - br_pass_frame_up(br, skb2); - if (skb) { if (dst) - br_forward(dst->dst, skb); + br_forward(dst->dst, skb, skb2); else - br_flood_forward(br, skb); + br_flood_forward(br, skb, skb2); } + if (skb2) + return br_pass_frame_up(skb2); + out: return 0; drop: @@ -138,7 +156,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) goto forward; - if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; /* frame consumed by filter */ else @@ -159,7 +177,7 @@ forward: if (!compare_ether_addr(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish); break; default: diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 2af6e4a9026..cb43312b846 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> +#include <linux/slab.h> #include <linux/times.h> #include <net/net_namespace.h> #include <asm/uaccess.h> @@ -411,6 +412,6 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } - pr_debug("Bridge does not support ioctl 0x%x\n", cmd); + br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd); return -EOPNOTSUPP; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c new file mode 100644 index 00000000000..27ae946363f --- /dev/null +++ b/net/bridge/br_multicast.c @@ -0,0 +1,1812 @@ +/* + * Bridge multicast support. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <linux/err.h> +#include <linux/if_ether.h> +#include <linux/igmp.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/netdevice.h> +#include <linux/netfilter_bridge.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <net/ip.h> +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> +#endif + +#include "br_private.h" + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline int ipv6_is_local_multicast(const struct in6_addr *addr) +{ + if (ipv6_addr_is_multicast(addr) && + IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL) + return 1; + return 0; +} +#endif + +static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) +{ + if (a->proto != b->proto) + return 0; + switch (a->proto) { + case htons(ETH_P_IP): + return a->u.ip4 == b->u.ip4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); +#endif + } + return 0; +} + +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) +{ + return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, + const struct in6_addr *ip) +{ + return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); +} +#endif + +static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, + struct br_ip *ip) +{ + switch (ip->proto) { + case htons(ETH_P_IP): + return __br_ip4_hash(mdb, ip->u.ip4); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return __br_ip6_hash(mdb, &ip->u.ip6); +#endif + } + return 0; +} + +static struct net_bridge_mdb_entry *__br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) +{ + struct net_bridge_mdb_entry *mp; + struct hlist_node *p; + + hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + if (br_ip_equal(&mp->addr, dst)) + return mp; + } + + return NULL; +} + +static struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +{ + if (!mdb) + return NULL; + + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); +} + +static struct net_bridge_mdb_entry *br_mdb_ip4_get( + struct net_bridge_mdb_htable *mdb, __be32 dst) +{ + struct br_ip br_dst; + + br_dst.u.ip4 = dst; + br_dst.proto = htons(ETH_P_IP); + + return br_mdb_ip_get(mdb, &br_dst); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static struct net_bridge_mdb_entry *br_mdb_ip6_get( + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) +{ + struct br_ip br_dst; + + ipv6_addr_copy(&br_dst.u.ip6, dst); + br_dst.proto = htons(ETH_P_IPV6); + + return br_mdb_ip_get(mdb, &br_dst); +} +#endif + +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb) +{ + struct net_bridge_mdb_htable *mdb = br->mdb; + struct br_ip ip; + + if (br->multicast_disabled) + return NULL; + + if (BR_INPUT_SKB_CB(skb)->igmp) + return NULL; + + ip.proto = skb->protocol; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ip.u.ip4 = ip_hdr(skb)->daddr; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr); + break; +#endif + default: + return NULL; + } + + return br_mdb_ip_get(mdb, &ip); +} + +static void br_mdb_free(struct rcu_head *head) +{ + struct net_bridge_mdb_htable *mdb = + container_of(head, struct net_bridge_mdb_htable, rcu); + struct net_bridge_mdb_htable *old = mdb->old; + + mdb->old = NULL; + kfree(old->mhash); + kfree(old); +} + +static int br_mdb_copy(struct net_bridge_mdb_htable *new, + struct net_bridge_mdb_htable *old, + int elasticity) +{ + struct net_bridge_mdb_entry *mp; + struct hlist_node *p; + int maxlen; + int len; + int i; + + for (i = 0; i < old->max; i++) + hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) + hlist_add_head(&mp->hlist[new->ver], + &new->mhash[br_ip_hash(new, &mp->addr)]); + + if (!elasticity) + return 0; + + maxlen = 0; + for (i = 0; i < new->max; i++) { + len = 0; + hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver]) + len++; + if (len > maxlen) + maxlen = len; + } + + return maxlen > elasticity ? -EINVAL : 0; +} + +static void br_multicast_free_pg(struct rcu_head *head) +{ + struct net_bridge_port_group *p = + container_of(head, struct net_bridge_port_group, rcu); + + kfree(p); +} + +static void br_multicast_free_group(struct rcu_head *head) +{ + struct net_bridge_mdb_entry *mp = + container_of(head, struct net_bridge_mdb_entry, rcu); + + kfree(mp); +} + +static void br_multicast_group_expired(unsigned long data) +{ + struct net_bridge_mdb_entry *mp = (void *)data; + struct net_bridge *br = mp->br; + struct net_bridge_mdb_htable *mdb; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || timer_pending(&mp->timer)) + goto out; + + if (!hlist_unhashed(&mp->mglist)) + hlist_del_init(&mp->mglist); + + if (mp->ports) + goto out; + + mdb = br->mdb; + hlist_del_rcu(&mp->hlist[mdb->ver]); + mdb->size--; + + del_timer(&mp->query_timer); + call_rcu_bh(&mp->rcu, br_multicast_free_group); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_del_pg(struct net_bridge *br, + struct net_bridge_port_group *pg) +{ + struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group **pp; + + mp = br_mdb_ip_get(mdb, &pg->addr); + if (WARN_ON(!mp)) + return; + + for (pp = &mp->ports; (p = *pp); pp = &p->next) { + if (p != pg) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + del_timer(&p->query_timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + + if (!mp->ports && hlist_unhashed(&mp->mglist) && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + + return; + } + + WARN_ON(1); +} + +static void br_multicast_port_group_expired(unsigned long data) +{ + struct net_bridge_port_group *pg = (void *)data; + struct net_bridge *br = pg->port->br; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || timer_pending(&pg->timer) || + hlist_unhashed(&pg->mglist)) + goto out; + + br_multicast_del_pg(br, pg); + +out: + spin_unlock(&br->multicast_lock); +} + +static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max, + int elasticity) +{ + struct net_bridge_mdb_htable *old = *mdbp; + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC); + if (!mdb) + return -ENOMEM; + + mdb->max = max; + mdb->old = old; + + mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC); + if (!mdb->mhash) { + kfree(mdb); + return -ENOMEM; + } + + mdb->size = old ? old->size : 0; + mdb->ver = old ? old->ver ^ 1 : 0; + + if (!old || elasticity) + get_random_bytes(&mdb->secret, sizeof(mdb->secret)); + else + mdb->secret = old->secret; + + if (!old) + goto out; + + err = br_mdb_copy(mdb, old, elasticity); + if (err) { + kfree(mdb->mhash); + kfree(mdb); + return err; + } + + call_rcu_bh(&mdb->rcu, br_mdb_free); + +out: + rcu_assign_pointer(*mdbp, mdb); + + return 0; +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, + __be32 group) +{ + struct sk_buff *skb; + struct igmphdr *ih; + struct ethhdr *eth; + struct iphdr *iph; + + skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + + sizeof(*ih) + 4); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IP); + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + memcpy(eth->h_source, br->dev->dev_addr, 6); + eth->h_dest[0] = 1; + eth->h_dest[1] = 0; + eth->h_dest[2] = 0x5e; + eth->h_dest[3] = 0; + eth->h_dest[4] = 0; + eth->h_dest[5] = 1; + eth->h_proto = htons(ETH_P_IP); + skb_put(skb, sizeof(*eth)); + + skb_set_network_header(skb, skb->len); + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = 6; + iph->tos = 0xc0; + iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4); + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = 1; + iph->protocol = IPPROTO_IGMP; + iph->saddr = 0; + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); + ((u8 *)&iph[1])[0] = IPOPT_RA; + ((u8 *)&iph[1])[1] = 4; + ((u8 *)&iph[1])[2] = 0; + ((u8 *)&iph[1])[3] = 0; + ip_send_check(iph); + skb_put(skb, 24); + + skb_set_transport_header(skb, skb->len); + ih = igmp_hdr(skb); + ih->type = IGMP_HOST_MEMBERSHIP_QUERY; + ih->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ih->group = group; + ih->csum = 0; + ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); + skb_put(skb, sizeof(*ih)); + + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, + struct in6_addr *group) +{ + struct sk_buff *skb; + struct ipv6hdr *ip6h; + struct mld_msg *mldq; + struct ethhdr *eth; + u8 *hopopt; + unsigned long interval; + + skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + + 8 + sizeof(*mldq)); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IPV6); + + /* Ethernet header */ + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + memcpy(eth->h_source, br->dev->dev_addr, 6); + ipv6_eth_mc_map(group, eth->h_dest); + eth->h_proto = htons(ETH_P_IPV6); + skb_put(skb, sizeof(*eth)); + + /* IPv6 header + HbH option */ + skb_set_network_header(skb, skb->len); + ip6h = ipv6_hdr(skb); + + *(__force __be32 *)ip6h = htonl(0x60000000); + ip6h->payload_len = 8 + sizeof(*mldq); + ip6h->nexthdr = IPPROTO_HOPOPTS; + ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + + hopopt = (u8 *)(ip6h + 1); + hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ + hopopt[1] = 0; /* length of HbH */ + hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ + hopopt[3] = 2; /* Length of RA Option */ + hopopt[4] = 0; /* Type = 0x0000 (MLD) */ + hopopt[5] = 0; + hopopt[6] = IPV6_TLV_PAD0; /* Pad0 */ + hopopt[7] = IPV6_TLV_PAD0; /* Pad0 */ + + skb_put(skb, sizeof(*ip6h) + 8); + + /* ICMPv6 */ + skb_set_transport_header(skb, skb->len); + mldq = (struct mld_msg *) icmp6_hdr(skb); + + interval = ipv6_addr_any(group) ? br->multicast_last_member_interval : + br->multicast_query_response_interval; + + mldq->mld_type = ICMPV6_MGM_QUERY; + mldq->mld_code = 0; + mldq->mld_cksum = 0; + mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); + mldq->mld_reserved = 0; + ipv6_addr_copy(&mldq->mld_mca, group); + + /* checksum */ + mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mldq), IPPROTO_ICMPV6, + csum_partial(mldq, + sizeof(*mldq), 0)); + skb_put(skb, sizeof(*mldq)); + + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} +#endif + +static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, + struct br_ip *addr) +{ + switch (addr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_alloc_query(br, addr->u.ip4); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return br_ip6_multicast_alloc_query(br, &addr->u.ip6); +#endif + } + return NULL; +} + +static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) +{ + struct net_bridge *br = mp->br; + struct sk_buff *skb; + + skb = br_multicast_alloc_query(br, &mp->addr); + if (!skb) + goto timer; + + netif_rx(skb); + +timer: + if (++mp->queries_sent < br->multicast_last_member_count) + mod_timer(&mp->query_timer, + jiffies + br->multicast_last_member_interval); +} + +static void br_multicast_group_query_expired(unsigned long data) +{ + struct net_bridge_mdb_entry *mp = (void *)data; + struct net_bridge *br = mp->br; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) || + mp->queries_sent >= br->multicast_last_member_count) + goto out; + + br_multicast_send_group_query(mp); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg) +{ + struct net_bridge_port *port = pg->port; + struct net_bridge *br = port->br; + struct sk_buff *skb; + + skb = br_multicast_alloc_query(br, &pg->addr); + if (!skb) + goto timer; + + br_deliver(port, skb); + +timer: + if (++pg->queries_sent < br->multicast_last_member_count) + mod_timer(&pg->query_timer, + jiffies + br->multicast_last_member_interval); +} + +static void br_multicast_port_group_query_expired(unsigned long data) +{ + struct net_bridge_port_group *pg = (void *)data; + struct net_bridge_port *port = pg->port; + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || + pg->queries_sent >= br->multicast_last_member_count) + goto out; + + br_multicast_send_port_group_query(pg); + +out: + spin_unlock(&br->multicast_lock); +} + +static struct net_bridge_mdb_entry *br_multicast_get_group( + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group, int hash) +{ + struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_entry *mp; + struct hlist_node *p; + unsigned count = 0; + unsigned max; + int elasticity; + int err; + + hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + count++; + if (unlikely(br_ip_equal(group, &mp->addr))) + return mp; + } + + elasticity = 0; + max = mdb->max; + + if (unlikely(count > br->hash_elasticity && count)) { + if (net_ratelimit()) + br_info(br, "Multicast hash table " + "chain limit reached: %s\n", + port ? port->dev->name : br->dev->name); + + elasticity = br->hash_elasticity; + } + + if (mdb->size >= max) { + max *= 2; + if (unlikely(max >= br->hash_max)) { + br_warn(br, "Multicast hash table maximum " + "reached, disabling snooping: %s, %d\n", + port ? port->dev->name : br->dev->name, max); + err = -E2BIG; +disable: + br->multicast_disabled = 1; + goto err; + } + } + + if (max > mdb->max || elasticity) { + if (mdb->old) { + if (net_ratelimit()) + br_info(br, "Multicast hash table " + "on fire: %s\n", + port ? port->dev->name : br->dev->name); + err = -EEXIST; + goto err; + } + + err = br_mdb_rehash(&br->mdb, max, elasticity); + if (err) { + br_warn(br, "Cannot rehash multicast " + "hash table, disabling snooping: %s, %d, %d\n", + port ? port->dev->name : br->dev->name, + mdb->size, err); + goto disable; + } + + err = -EAGAIN; + goto err; + } + + return NULL; + +err: + mp = ERR_PTR(err); + return mp; +} + +static struct net_bridge_mdb_entry *br_multicast_new_group( + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_entry *mp; + int hash; + + if (!mdb) { + if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0)) + return NULL; + goto rehash; + } + + hash = br_ip_hash(mdb, group); + mp = br_multicast_get_group(br, port, group, hash); + switch (PTR_ERR(mp)) { + case 0: + break; + + case -EAGAIN: +rehash: + mdb = br->mdb; + hash = br_ip_hash(mdb, group); + break; + + default: + goto out; + } + + mp = kzalloc(sizeof(*mp), GFP_ATOMIC); + if (unlikely(!mp)) + goto out; + + mp->br = br; + mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + setup_timer(&mp->query_timer, br_multicast_group_query_expired, + (unsigned long)mp); + + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); + mdb->size++; + +out: + return mp; +} + +static int br_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group **pp; + unsigned long now = jiffies; + int err; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (unlikely(IS_ERR(mp) || !mp)) + goto err; + + if (!port) { + hlist_add_head(&mp->mglist, &br->mglist); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + goto out; + } + + for (pp = &mp->ports; (p = *pp); pp = &p->next) { + if (p->port == port) + goto found; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + err = -ENOMEM; + if (unlikely(!p)) + goto err; + + p->addr = *group; + p->port = port; + p->next = *pp; + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); + setup_timer(&p->query_timer, br_multicast_port_group_query_expired, + (unsigned long)p); + + rcu_assign_pointer(*pp, p); + +found: + mod_timer(&p->timer, now + br->multicast_membership_interval); +out: + err = 0; + +err: + spin_unlock(&br->multicast_lock); + return err; +} + +static int br_ip4_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group) +{ + struct br_ip br_group; + + if (ipv4_is_local_multicast(group)) + return 0; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + + return br_multicast_add_group(br, port, &br_group); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group) +{ + struct br_ip br_group; + + if (ipv6_is_local_multicast(group)) + return 0; + + ipv6_addr_copy(&br_group.u.ip6, group); + br_group.proto = htons(ETH_P_IP); + + return br_multicast_add_group(br, port, &br_group); +} +#endif + +static void br_multicast_router_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->multicast_router != 1 || + timer_pending(&port->multicast_router_timer) || + hlist_unhashed(&port->rlist)) + goto out; + + hlist_del_init_rcu(&port->rlist); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_local_router_expired(unsigned long data) +{ +} + +static void __br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *ip) +{ + struct sk_buff *skb; + + skb = br_multicast_alloc_query(br, ip); + if (!skb) + return; + + if (port) { + __skb_push(skb, sizeof(struct ethhdr)); + skb->dev = port->dev; + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + dev_queue_xmit); + } else + netif_rx(skb); +} + +static void br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, u32 sent) +{ + unsigned long time; + struct br_ip br_group; + + if (!netif_running(br->dev) || br->multicast_disabled || + timer_pending(&br->multicast_querier_timer)) + return; + + memset(&br_group.u, 0, sizeof(br_group.u)); + + br_group.proto = htons(ETH_P_IP); + __br_multicast_send_query(br, port, &br_group); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + br_group.proto = htons(ETH_P_IPV6); + __br_multicast_send_query(br, port, &br_group); +#endif + + time = jiffies; + time += sent < br->multicast_startup_query_count ? + br->multicast_startup_query_interval : + br->multicast_query_interval; + mod_timer(port ? &port->multicast_query_timer : + &br->multicast_query_timer, time); +} + +static void br_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + goto out; + + if (port->multicast_startup_queries_sent < + br->multicast_startup_query_count) + port->multicast_startup_queries_sent++; + + br_multicast_send_query(port->br, port, + port->multicast_startup_queries_sent); + +out: + spin_unlock(&br->multicast_lock); +} + +void br_multicast_add_port(struct net_bridge_port *port) +{ + port->multicast_router = 1; + + setup_timer(&port->multicast_router_timer, br_multicast_router_expired, + (unsigned long)port); + setup_timer(&port->multicast_query_timer, + br_multicast_port_query_expired, (unsigned long)port); +} + +void br_multicast_del_port(struct net_bridge_port *port) +{ + del_timer_sync(&port->multicast_router_timer); +} + +static void __br_multicast_enable_port(struct net_bridge_port *port) +{ + port->multicast_startup_queries_sent = 0; + + if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 || + del_timer(&port->multicast_query_timer)) + mod_timer(&port->multicast_query_timer, jiffies); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (br->multicast_disabled || !netif_running(br->dev)) + goto out; + + __br_multicast_enable_port(port); + +out: + spin_unlock(&br->multicast_lock); +} + +void br_multicast_disable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + struct net_bridge_port_group *pg; + struct hlist_node *p, *n; + + spin_lock(&br->multicast_lock); + hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist) + br_multicast_del_pg(br, pg); + + if (!hlist_unhashed(&port->rlist)) + hlist_del_init_rcu(&port->rlist); + del_timer(&port->multicast_router_timer); + del_timer(&port->multicast_query_timer); + spin_unlock(&br->multicast_lock); +} + +static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct igmpv3_report *ih; + struct igmpv3_grec *grec; + int i; + int len; + int num; + int type; + int err = 0; + __be32 group; + + if (!pskb_may_pull(skb, sizeof(*ih))) + return -EINVAL; + + ih = igmpv3_report_hdr(skb); + num = ntohs(ih->ngrec); + len = sizeof(*ih); + + for (i = 0; i < num; i++) { + len += sizeof(*grec); + if (!pskb_may_pull(skb, len)) + return -EINVAL; + + grec = (void *)(skb->data + len - sizeof(*grec)); + group = grec->grec_mca; + type = grec->grec_type; + + len += ntohs(grec->grec_nsrcs) * 4; + if (!pskb_may_pull(skb, len)) + return -EINVAL; + + /* We treat this as an IGMPv2 report for now. */ + switch (type) { + case IGMPV3_MODE_IS_INCLUDE: + case IGMPV3_MODE_IS_EXCLUDE: + case IGMPV3_CHANGE_TO_INCLUDE: + case IGMPV3_CHANGE_TO_EXCLUDE: + case IGMPV3_ALLOW_NEW_SOURCES: + case IGMPV3_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + err = br_ip4_multicast_add_group(br, port, group); + if (err) + break; + } + + return err; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_mld2_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct icmp6hdr *icmp6h; + struct mld2_grec *grec; + int i; + int len; + int num; + int err = 0; + + if (!pskb_may_pull(skb, sizeof(*icmp6h))) + return -EINVAL; + + icmp6h = icmp6_hdr(skb); + num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); + len = sizeof(*icmp6h); + + for (i = 0; i < num; i++) { + __be16 *nsrcs, _nsrcs; + + nsrcs = skb_header_pointer(skb, + len + offsetof(struct mld2_grec, + grec_mca), + sizeof(_nsrcs), &_nsrcs); + if (!nsrcs) + return -EINVAL; + + if (!pskb_may_pull(skb, + len + sizeof(*grec) + + sizeof(struct in6_addr) * (*nsrcs))) + return -EINVAL; + + grec = (struct mld2_grec *)(skb->data + len); + len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); + + /* We treat these as MLDv1 reports for now. */ + switch (grec->grec_type) { + case MLD2_MODE_IS_INCLUDE: + case MLD2_MODE_IS_EXCLUDE: + case MLD2_CHANGE_TO_INCLUDE: + case MLD2_CHANGE_TO_EXCLUDE: + case MLD2_ALLOW_NEW_SOURCES: + case MLD2_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); + if (!err) + break; + } + + return err; +} +#endif + +/* + * Add port to rotuer_list + * list is maintained ordered by pointer value + * and locked by br->multicast_lock and RCU + */ +static void br_multicast_add_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + struct net_bridge_port *p; + struct hlist_node *n, *slot = NULL; + + hlist_for_each_entry(p, n, &br->router_list, rlist) { + if ((unsigned long) port >= (unsigned long) p) + break; + slot = n; + } + + if (slot) + hlist_add_after_rcu(slot, &port->rlist); + else + hlist_add_head_rcu(&port->rlist, &br->router_list); +} + +static void br_multicast_mark_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + unsigned long now = jiffies; + + if (!port) { + if (br->multicast_router == 1) + mod_timer(&br->multicast_router_timer, + now + br->multicast_querier_interval); + return; + } + + if (port->multicast_router != 1) + return; + + if (!hlist_unhashed(&port->rlist)) + goto timer; + + br_multicast_add_router(br, port); + +timer: + mod_timer(&port->multicast_router_timer, + now + br->multicast_querier_interval); +} + +static void br_multicast_query_received(struct net_bridge *br, + struct net_bridge_port *port, + int saddr) +{ + if (saddr) + mod_timer(&br->multicast_querier_timer, + jiffies + br->multicast_querier_interval); + else if (timer_pending(&br->multicast_querier_timer)) + return; + + br_multicast_mark_router(br, port); +} + +static int br_ip4_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct igmphdr *ih = igmp_hdr(skb); + struct net_bridge_mdb_entry *mp; + struct igmpv3_query *ih3; + struct net_bridge_port_group *p; + struct net_bridge_port_group **pp; + unsigned long max_delay; + unsigned long now = jiffies; + __be32 group; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + br_multicast_query_received(br, port, !!iph->saddr); + + group = ih->group; + + if (skb->len == sizeof(*ih)) { + max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); + + if (!max_delay) { + max_delay = 10 * HZ; + group = 0; + } + } else { + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { + err = -EINVAL; + goto out; + } + + ih3 = igmpv3_query_hdr(skb); + if (ih3->nsrcs) + goto out; + + max_delay = ih3->code ? + IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; + } + + if (!group) + goto out; + + mp = br_mdb_ip4_get(br->mdb, group); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + + if (!hlist_unhashed(&mp->mglist) && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; (p = *pp); pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0) + mod_timer(&mp->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_ip6_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); + struct net_bridge_mdb_entry *mp; + struct mld2_query *mld2q; + struct net_bridge_port_group *p, **pp; + unsigned long max_delay; + unsigned long now = jiffies; + struct in6_addr *group = NULL; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); + + if (skb->len == sizeof(*mld)) { + if (!pskb_may_pull(skb, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *) icmp6_hdr(skb); + max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay)); + if (max_delay) + group = &mld->mld_mca; + } else if (skb->len >= sizeof(*mld2q)) { + if (!pskb_may_pull(skb, sizeof(*mld2q))) { + err = -EINVAL; + goto out; + } + mld2q = (struct mld2_query *)icmp6_hdr(skb); + if (!mld2q->mld2q_nsrcs) + group = &mld2q->mld2q_mca; + max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1; + } + + if (!group) + goto out; + + mp = br_mdb_ip6_get(br->mdb, group); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + if (!hlist_unhashed(&mp->mglist) && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; (p = *pp); pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0) + mod_timer(&mp->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} +#endif + +static void br_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + unsigned long now; + unsigned long time; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED) || + timer_pending(&br->multicast_querier_timer)) + goto out; + + mdb = br->mdb; + mp = br_mdb_ip_get(mdb, group); + if (!mp) + goto out; + + now = jiffies; + time = now + br->multicast_last_member_count * + br->multicast_last_member_interval; + + if (!port) { + if (!hlist_unhashed(&mp->mglist) && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, time) : + try_to_del_timer_sync(&mp->timer) >= 0)) { + mod_timer(&mp->timer, time); + + mp->queries_sent = 0; + mod_timer(&mp->query_timer, now); + } + + goto out; + } + + for (p = mp->ports; p; p = p->next) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + + p->queries_sent = 0; + mod_timer(&p->query_timer, now); + } + + break; + } + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group) +{ + struct br_ip br_group; + + if (ipv4_is_local_multicast(group)) + return; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + + br_multicast_leave_group(br, port, &br_group); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group) +{ + struct br_ip br_group; + + if (ipv6_is_local_multicast(group)) + return; + + ipv6_addr_copy(&br_group.u.ip6, group); + br_group.proto = htons(ETH_P_IPV6); + + br_multicast_leave_group(br, port, &br_group); +} +#endif + +static int br_multicast_ipv4_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct sk_buff *skb2 = skb; + struct iphdr *iph; + struct igmphdr *ih; + unsigned len; + unsigned offset; + int err; + + /* We treat OOM as packet loss for now. */ + if (!pskb_may_pull(skb, sizeof(*iph))) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->ihl < 5 || iph->version != 4) + return -EINVAL; + + if (!pskb_may_pull(skb, ip_hdrlen(skb))) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + if (iph->protocol != IPPROTO_IGMP) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < ip_hdrlen(skb)) + return -EINVAL; + + if (skb->len > len) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + return -ENOMEM; + + err = pskb_trim_rcsum(skb2, len); + if (err) + goto err_out; + } + + len -= ip_hdrlen(skb2); + offset = skb_network_offset(skb2) + ip_hdrlen(skb2); + __skb_pull(skb2, offset); + skb_reset_transport_header(skb2); + + err = -EINVAL; + if (!pskb_may_pull(skb2, sizeof(*ih))) + goto out; + + switch (skb2->ip_summed) { + case CHECKSUM_COMPLETE: + if (!csum_fold(skb2->csum)) + break; + /* fall through */ + case CHECKSUM_NONE: + skb2->csum = 0; + if (skb_checksum_complete(skb2)) + goto out; + } + + err = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 1; + ih = igmp_hdr(skb2); + + switch (ih->type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + err = br_ip4_multicast_add_group(br, port, ih->group); + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + err = br_ip4_multicast_igmp3_report(br, port, skb2); + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + err = br_ip4_multicast_query(br, port, skb2); + break; + case IGMP_HOST_LEAVE_MESSAGE: + br_ip4_multicast_leave_group(br, port, ih->group); + break; + } + +out: + __skb_push(skb2, offset); +err_out: + if (skb2 != skb) + kfree_skb(skb2); + return err; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int br_multicast_ipv6_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + struct sk_buff *skb2 = skb; + struct ipv6hdr *ip6h; + struct icmp6hdr *icmp6h; + u8 nexthdr; + unsigned len; + unsigned offset; + int err; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + /* + * We're interested in MLD messages only. + * - Version is 6 + * - MLD has always Router Alert hop-by-hop option + * - But we do not support jumbrograms. + */ + if (ip6h->version != 6 || + ip6h->nexthdr != IPPROTO_HOPOPTS || + ip6h->payload_len == 0) + return 0; + + len = ntohs(ip6h->payload_len); + if (skb->len < len) + return -EINVAL; + + nexthdr = ip6h->nexthdr; + offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr); + + if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + return 0; + + /* Okay, we found ICMPv6 header */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + return -ENOMEM; + + len -= offset - skb_network_offset(skb2); + + __skb_pull(skb2, offset); + skb_reset_transport_header(skb2); + + err = -EINVAL; + if (!pskb_may_pull(skb2, sizeof(*icmp6h))) + goto out; + + icmp6h = icmp6_hdr(skb2); + + switch (icmp6h->icmp6_type) { + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MLD2_REPORT: + break; + default: + err = 0; + goto out; + } + + /* Okay, we found MLD message. Check further. */ + if (skb2->len > len) { + err = pskb_trim_rcsum(skb2, len); + if (err) + goto out; + } + + switch (skb2->ip_summed) { + case CHECKSUM_COMPLETE: + if (!csum_fold(skb2->csum)) + break; + /*FALLTHROUGH*/ + case CHECKSUM_NONE: + skb2->csum = 0; + if (skb_checksum_complete(skb2)) + goto out; + } + + err = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 1; + + switch (icmp6h->icmp6_type) { + case ICMPV6_MGM_REPORT: + { + struct mld_msg *mld = (struct mld_msg *)icmp6h; + BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); + break; + } + case ICMPV6_MLD2_REPORT: + err = br_ip6_multicast_mld2_report(br, port, skb2); + break; + case ICMPV6_MGM_QUERY: + err = br_ip6_multicast_query(br, port, skb2); + break; + case ICMPV6_MGM_REDUCTION: + { + struct mld_msg *mld = (struct mld_msg *)icmp6h; + br_ip6_multicast_leave_group(br, port, &mld->mld_mca); + } + } + +out: + __skb_push(skb2, offset); + if (skb2 != skb) + kfree_skb(skb2); + return err; +} +#endif + +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb) +{ + BR_INPUT_SKB_CB(skb)->igmp = 0; + BR_INPUT_SKB_CB(skb)->mrouters_only = 0; + + if (br->multicast_disabled) + return 0; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return br_multicast_ipv4_rcv(br, port, skb); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + return br_multicast_ipv6_rcv(br, port, skb); +#endif + } + + return 0; +} + +static void br_multicast_query_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + spin_lock(&br->multicast_lock); + if (br->multicast_startup_queries_sent < + br->multicast_startup_query_count) + br->multicast_startup_queries_sent++; + + br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent); + + spin_unlock(&br->multicast_lock); +} + +void br_multicast_init(struct net_bridge *br) +{ + br->hash_elasticity = 4; + br->hash_max = 512; + + br->multicast_router = 1; + br->multicast_last_member_count = 2; + br->multicast_startup_query_count = 2; + + br->multicast_last_member_interval = HZ; + br->multicast_query_response_interval = 10 * HZ; + br->multicast_startup_query_interval = 125 * HZ / 4; + br->multicast_query_interval = 125 * HZ; + br->multicast_querier_interval = 255 * HZ; + br->multicast_membership_interval = 260 * HZ; + + spin_lock_init(&br->multicast_lock); + setup_timer(&br->multicast_router_timer, + br_multicast_local_router_expired, 0); + setup_timer(&br->multicast_querier_timer, + br_multicast_local_router_expired, 0); + setup_timer(&br->multicast_query_timer, br_multicast_query_expired, + (unsigned long)br); +} + +void br_multicast_open(struct net_bridge *br) +{ + br->multicast_startup_queries_sent = 0; + + if (br->multicast_disabled) + return; + + mod_timer(&br->multicast_query_timer, jiffies); +} + +void br_multicast_stop(struct net_bridge *br) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct hlist_node *p, *n; + u32 ver; + int i; + + del_timer_sync(&br->multicast_router_timer); + del_timer_sync(&br->multicast_querier_timer); + del_timer_sync(&br->multicast_query_timer); + + spin_lock_bh(&br->multicast_lock); + mdb = br->mdb; + if (!mdb) + goto out; + + br->mdb = NULL; + + ver = mdb->ver; + for (i = 0; i < mdb->max; i++) { + hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], + hlist[ver]) { + del_timer(&mp->timer); + del_timer(&mp->query_timer); + call_rcu_bh(&mp->rcu, br_multicast_free_group); + } + } + + if (mdb->old) { + spin_unlock_bh(&br->multicast_lock); + rcu_barrier_bh(); + spin_lock_bh(&br->multicast_lock); + WARN_ON(mdb->old); + } + + mdb->old = mdb; + call_rcu_bh(&mdb->rcu, br_mdb_free); + +out: + spin_unlock_bh(&br->multicast_lock); +} + +int br_multicast_set_router(struct net_bridge *br, unsigned long val) +{ + int err = -ENOENT; + + spin_lock_bh(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + + switch (val) { + case 0: + case 2: + del_timer(&br->multicast_router_timer); + /* fall through */ + case 1: + br->multicast_router = val; + err = 0; + break; + + default: + err = -EINVAL; + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return err; +} + +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) +{ + struct net_bridge *br = p->br; + int err = -ENOENT; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED) + goto unlock; + + switch (val) { + case 0: + case 1: + case 2: + p->multicast_router = val; + err = 0; + + if (val < 2 && !hlist_unhashed(&p->rlist)) + hlist_del_init_rcu(&p->rlist); + + if (val == 1) + break; + + del_timer(&p->multicast_router_timer); + + if (val == 0) + break; + + br_multicast_add_router(br, p); + break; + + default: + err = -EINVAL; + break; + } + +unlock: + spin_unlock(&br->multicast_lock); + + return err; +} + +int br_multicast_toggle(struct net_bridge *br, unsigned long val) +{ + struct net_bridge_port *port; + int err = -ENOENT; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + + err = 0; + if (br->multicast_disabled == !val) + goto unlock; + + br->multicast_disabled = !val; + if (br->multicast_disabled) + goto unlock; + + if (br->mdb) { + if (br->mdb->old) { + err = -EEXIST; +rollback: + br->multicast_disabled = !!val; + goto unlock; + } + + err = br_mdb_rehash(&br->mdb, br->mdb->max, + br->hash_elasticity); + if (err) + goto rollback; + } + + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) { + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + continue; + + __br_multicast_enable_port(port); + } + +unlock: + spin_unlock(&br->multicast_lock); + + return err; +} + +int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) +{ + int err = -ENOENT; + u32 old; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + + err = -EINVAL; + if (!is_power_of_2(val)) + goto unlock; + if (br->mdb && val < br->mdb->size) + goto unlock; + + err = 0; + + old = br->hash_max; + br->hash_max = val; + + if (br->mdb) { + if (br->mdb->old) { + err = -EEXIST; +rollback: + br->hash_max = old; + goto unlock; + } + + err = br_mdb_rehash(&br->mdb, br->hash_max, + br->hash_elasticity); + if (err) + goto rollback; + } + +unlock: + spin_unlock(&br->multicast_lock); + + return err; +} diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 268e2e72588..8fb75f89c4a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -3,15 +3,8 @@ * Linux ethernet bridge * * Authors: - * Lennert Buytenhek <buytenh@gnu.org> - * Bart De Schuymer (maintainer) <bdschuym@pandora.be> - * - * Changes: - * Apr 29 2003: physdev module support (bdschuym) - * Jun 19 2003: let arptables see bridged ARP traffic (bdschuym) - * Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge - * (bdschuym) - * Sep 01 2004: add IPv6 filtering (bdschuym) + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -23,6 +16,7 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/skbuff.h> @@ -203,15 +197,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) skb->nf_bridge->data, header_size); } -/* - * When forwarding bridge frames, we save a copy of the original - * header before processing. +static inline void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + +/* Fill in the header for fragmented IP packets handled by + * the IPv4 connection tracking code. */ int nf_bridge_copy_header(struct sk_buff *skb) { int err; - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + unsigned int header_size; + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); err = skb_cow_head(skb, header_size); if (err) return err; @@ -245,27 +248,48 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) skb_dst_set(skb, &rt->u.dst); skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; } -static void __br_dnat_complain(void) +/* Obtain the correct destination MAC address, while preserving the original + * source MAC address. If we already know this address, we just copy it. If we + * don't, we use the neighbour framework to find out. In both cases, we make + * sure that br_handle_frame_finish() is called afterwards. + */ +static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { - static unsigned long last_complaint; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct dst_entry *dst; - if (jiffies - last_complaint >= 5 * HZ) { - printk(KERN_WARNING "Performing cross-bridge DNAT requires IP " - "forwarding to be enabled\n"); - last_complaint = jiffies; + skb->dev = bridge_parent(skb->dev); + if (!skb->dev) + goto free_skb; + dst = skb_dst(skb); + if (dst->hh) { + neigh_hh_bridge(dst->hh, skb); + skb->dev = nf_bridge->physindev; + return br_handle_frame_finish(skb); + } else if (dst->neighbour) { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. */ + skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + return dst->neighbour->output(skb); } +free_skb: + kfree_skb(skb); + return 0; } /* This requires some explaining. If DNAT has taken place, - * we will need to fix up the destination Ethernet address, - * and this is a tricky process. + * we will need to fix up the destination Ethernet address. * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge @@ -279,62 +303,29 @@ static void __br_dnat_complain(void) * call ip_route_input() and to look at skb->dst->dev, which is * changed to the destination device if ip_route_input() succeeds. * - * Let us first consider the case that ip_route_input() succeeds: - * - * If skb->dst->dev equals the logical bridge device the packet - * came in on, we can consider this bridging. The packet is passed - * through the neighbour output function to build a new destination - * MAC address, which will make the packet enter br_nf_local_out() - * not much later. In that function it is assured that the iptables - * FORWARD chain is traversed for the packet. + * Let's first consider the case that ip_route_input() succeeds: * + * If the output device equals the logical bridge device the packet + * came in on, we can consider this bridging. The corresponding MAC + * address will be obtained in br_nf_pre_routing_finish_bridge. * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will * later be passed up to the IP stack to be routed. For a redirected * packet, ip_route_input() will give back the localhost as output device, * which differs from the bridge device. * - * Let us now consider the case that ip_route_input() fails: + * Let's now consider the case that ip_route_input() fails: * * This can be because the destination address is martian, in which case * the packet will be dropped. - * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() - * will fail, while __ip_route_output_key() will return success. The source - * address for __ip_route_output_key() is set to zero, so __ip_route_output_key + * If IP forwarding is disabled, ip_route_input() will fail, while + * ip_route_output_key() can return success. The source + * address for ip_route_output_key() is set to zero, so ip_route_output_key() * thinks we're handling a locally generated packet and won't care - * if IP forwarding is allowed. We send a warning message to the users's - * log telling her to put IP forwarding on. - * - * ip_route_input() will also fail if there is no route available. - * In that case we just drop the packet. - * - * --Lennert, 20020411 - * --Bart, 20020416 (updated) - * --Bart, 20021007 (updated) - * --Bart, 20062711 (updated) */ -static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) -{ - if (skb->pkt_type == PACKET_OTHERHOST) { - skb->pkt_type = PACKET_HOST; - skb->nf_bridge->mask |= BRNF_PKT_TYPE; - } - skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - - skb->dev = bridge_parent(skb->dev); - if (skb->dev) { - struct dst_entry *dst = skb_dst(skb); - - nf_bridge_pull_encap_header(skb); - - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); - } - kfree_skb(skb); - return 0; -} - + * if IP forwarding is enabled. If the output device equals the logical bridge + * device, we proceed as if ip_route_input() succeeded. If it differs from the + * logical bridge port or if ip_route_output_key() fails we drop the packet. + */ static int br_nf_pre_routing_finish(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -378,11 +369,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) skb_dst_set(skb, (struct dst_entry *)rt); goto bridged_dnat; } - /* we are sure that forwarding is disabled, so printing - * this message is no problem. Note that the packet could - * still have a martian destination address, in which case - * the packet could be dropped even if forwarding were enabled */ - __br_dnat_complain(); dst_release((struct dst_entry *)rt); } free_skb: @@ -391,12 +377,11 @@ free_skb: } else { if (skb_dst(skb)->dev == dev) { bridged_dnat: - /* Tell br_nf_local_out this is a - * bridged frame */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, + NF_HOOK_THRESH(NFPROTO_BRIDGE, + NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); @@ -416,8 +401,9 @@ bridged_dnat: } skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -436,6 +422,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = bridge_parent(skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) + nf_bridge->mask |= BRNF_8021Q; + else if (skb->protocol == htons(ETH_P_PPP_SES)) + nf_bridge->mask |= BRNF_PPPoE; return skb->dev; } @@ -534,7 +524,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, if (!setup_pre_routing(skb)) return NF_DROP; - NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + skb->protocol = htons(ETH_P_IPV6); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; @@ -600,14 +591,18 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, pskb_trim_rcsum(skb, len); + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); - NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; @@ -651,11 +646,13 @@ static int br_nf_forward_finish(struct sk_buff *skb) skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } + nf_bridge_update_protocol(skb); } else { in = *((struct net_device **)(skb->cb)); } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, + + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, skb->dev, br_forward_finish, 1); return 0; } @@ -706,6 +703,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, /* The physdev module checks on this */ nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; + if (pf == PF_INET) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, br_nf_forward_finish); @@ -743,60 +744,11 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT *********************************************** - * - * This function sees both locally originated IP packets and forwarded - * IP packets (in both cases the destination device is a bridge - * device). It also sees bridged-and-DNAT'ed packets. - * - * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged - * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() - * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority - * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor - * will be executed. - */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct net_device *realindev; - struct nf_bridge_info *nf_bridge; - - if (!skb->nf_bridge) - return NF_ACCEPT; - - /* Need exclusive nf_bridge_info since we might have multiple - * different physoutdevs. */ - if (!nf_bridge_unshare(skb)) - return NF_DROP; - - nf_bridge = skb->nf_bridge; - if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) - return NF_ACCEPT; - - /* Bridged, take PF_BRIDGE/FORWARD. - * (see big note in front of br_nf_pre_routing_finish) */ - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; - - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; - } - nf_bridge_push_encap_header(skb); - - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, - br_forward_finish); - return NF_STOLEN; -} - #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->nfct != NULL && - (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && - skb->len > skb->dev->mtu && + if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else @@ -819,21 +771,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; -#ifdef CONFIG_NETFILTER_DEBUG - /* Be very paranoid. This probably won't happen anymore, but let's - * keep the check just to be sure... */ - if (skb_mac_header(skb) < skb->head || - skb_mac_header(skb) + ETH_HLEN > skb->data) { - printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " - "bad mac.raw pointer.\n"); - goto print_error; - } -#endif - - if (!nf_bridge) - return NF_ACCEPT; - - if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))) + if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) return NF_ACCEPT; if (!realoutdev) @@ -848,13 +786,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, else return NF_ACCEPT; -#ifdef CONFIG_NETFILTER_DEBUG - if (skb_dst(skb) == NULL) { - printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); - goto print_error; - } -#endif - /* We assume any code from br_dev_queue_push_xmit onwards doesn't care * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { @@ -864,24 +795,15 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, nf_bridge_pull_encap_header(skb); nf_bridge_save_header(skb); + if (pf == PF_INET) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; - -#ifdef CONFIG_NETFILTER_DEBUG -print_error: - if (skb->dev != NULL) { - printk("[%s]", skb->dev->name); - if (realoutdev) - printk("[%s]", realoutdev->name); - } - printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb), - skb->data); - dump_stack(); - return NF_ACCEPT; -#endif } /* IP/SABOTAGE *****************************************************/ @@ -900,10 +822,8 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; } -/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent - * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. - * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because - * ip_refrag() can return NF_STOLEN. */ +/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * br_dev_queue_push_xmit is called afterwards */ static struct nf_hook_ops br_nf_ops[] __read_mostly = { { .hook = br_nf_pre_routing, @@ -934,13 +854,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_local_out, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_LOCAL_OUT, - .priority = NF_BR_PRI_FIRST, - }, - { .hook = br_nf_post_routing, .owner = THIS_MODULE, .pf = PF_BRIDGE, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fcffb3fb117..fe0a79018ab 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -11,6 +11,7 @@ */ #include <linux/kernel.h> +#include <linux/slab.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -41,8 +42,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; - pr_debug("br_fill_info event %d port %s master %s\n", - event, dev->name, br->dev->name); + br_debug(br, "br_fill_info event %d port %s master %s\n", + event, dev->name, br->dev->name); nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) @@ -86,7 +87,9 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) struct sk_buff *skb; int err = -ENOBUFS; - pr_debug("bridge notify event=%d\n", event); + br_debug(port->br, "port %u(%s) event %d\n", + (unsigned)port->port_no, port->dev->name, event); + skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 763a3ec292e..717e1fd6133 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_device *dev = ptr; struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + int err; /* not a port of a bridge */ if (p == NULL) @@ -82,6 +83,16 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_UNREGISTER: br_del_if(br, dev); break; + + case NETDEV_CHANGENAME: + err = br_sysfs_renameif(p); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; } /* Events that may cause spanning tree to refresh */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2114e45682e..0f4a74bc6a9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -45,6 +45,17 @@ struct mac_addr unsigned char addr[6]; }; +struct br_ip +{ + union { + __be32 ip4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr ip6; +#endif + } u; + __be16 proto; +}; + struct net_bridge_fdb_entry { struct hlist_node hlist; @@ -57,6 +68,41 @@ struct net_bridge_fdb_entry unsigned char is_static; }; +struct net_bridge_port_group { + struct net_bridge_port *port; + struct net_bridge_port_group *next; + struct hlist_node mglist; + struct rcu_head rcu; + struct timer_list timer; + struct timer_list query_timer; + struct br_ip addr; + u32 queries_sent; +}; + +struct net_bridge_mdb_entry +{ + struct hlist_node hlist[2]; + struct hlist_node mglist; + struct net_bridge *br; + struct net_bridge_port_group *ports; + struct rcu_head rcu; + struct timer_list timer; + struct timer_list query_timer; + struct br_ip addr; + u32 queries_sent; +}; + +struct net_bridge_mdb_htable +{ + struct hlist_head *mhash; + struct rcu_head rcu; + struct net_bridge_mdb_htable *old; + u32 size; + u32 max; + u32 secret; + u32 ver; +}; + struct net_bridge_port { struct net_bridge *br; @@ -84,6 +130,26 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + u32 multicast_startup_queries_sent; + unsigned char multicast_router; + struct timer_list multicast_router_timer; + struct timer_list multicast_query_timer; + struct hlist_head mglist; + struct hlist_node rlist; +#endif + +#ifdef CONFIG_SYSFS + char sysfs_name[IFNAMSIZ]; +#endif +}; + +struct br_cpu_netstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; }; struct net_bridge @@ -91,9 +157,10 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; + + struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - struct list_head age_list; unsigned long feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; @@ -125,6 +192,35 @@ struct net_bridge unsigned char topology_change; unsigned char topology_change_detected; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + unsigned char multicast_router; + + u8 multicast_disabled:1; + + u32 hash_elasticity; + u32 hash_max; + + u32 multicast_last_member_count; + u32 multicast_startup_queries_sent; + u32 multicast_startup_query_count; + + unsigned long multicast_last_member_interval; + unsigned long multicast_membership_interval; + unsigned long multicast_querier_interval; + unsigned long multicast_query_interval; + unsigned long multicast_query_response_interval; + unsigned long multicast_startup_query_interval; + + spinlock_t multicast_lock; + struct net_bridge_mdb_htable *mdb; + struct hlist_head router_list; + struct hlist_head mglist; + + struct timer_list multicast_router_timer; + struct timer_list multicast_querier_timer; + struct timer_list multicast_query_timer; +#endif + struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; @@ -132,6 +228,37 @@ struct net_bridge struct kobject *ifobj; }; +struct br_input_skb_cb { + struct net_device *brdev; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + int igmp; + int mrouters_only; +#endif +}; + +#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) +#else +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) +#endif + +#define br_printk(level, br, format, args...) \ + printk(level "%s: " format, (br)->dev->name, ##args) + +#define br_err(__br, format, args...) \ + br_printk(KERN_ERR, __br, format, ##args) +#define br_warn(__br, format, args...) \ + br_printk(KERN_WARNING, __br, format, ##args) +#define br_notice(__br, format, args...) \ + br_printk(KERN_NOTICE, __br, format, ##args) +#define br_info(__br, format, args...) \ + br_printk(KERN_INFO, __br, format, ##args) + +#define br_debug(br, format, args...) \ + pr_debug("%s: " format, (br)->dev->name, ##args) + extern struct notifier_block br_device_notifier; extern const u8 br_group_address[ETH_ALEN]; @@ -145,6 +272,18 @@ static inline int br_is_root_bridge(const struct net_bridge *br) extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +#ifdef CONFIG_NET_POLL_CONTROLLER +extern void br_netpoll_cleanup(struct net_device *dev); +extern void br_netpoll_enable(struct net_bridge *br, + struct net_device *dev); +extern void br_netpoll_disable(struct net_bridge *br, + struct net_device *dev); +#else +#define br_netpoll_cleanup(br) +#define br_netpoll_enable(br, dev) +#define br_netpoll_disable(br, dev) + +#endif /* br_fdb.c */ extern int br_fdb_init(void); @@ -172,10 +311,11 @@ extern void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, - struct sk_buff *skb); + struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); -extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb2); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); @@ -198,6 +338,94 @@ extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); +/* br_multicast.c */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +extern int br_multicast_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb); +extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb); +extern void br_multicast_add_port(struct net_bridge_port *port); +extern void br_multicast_del_port(struct net_bridge_port *port); +extern void br_multicast_enable_port(struct net_bridge_port *port); +extern void br_multicast_disable_port(struct net_bridge_port *port); +extern void br_multicast_init(struct net_bridge *br); +extern void br_multicast_open(struct net_bridge *br); +extern void br_multicast_stop(struct net_bridge *br); +extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb); +extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb2); +extern int br_multicast_set_router(struct net_bridge *br, unsigned long val); +extern int br_multicast_set_port_router(struct net_bridge_port *p, + unsigned long val); +extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); +extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); + +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return br->multicast_router == 2 || + (br->multicast_router == 1 && + timer_pending(&br->multicast_router_timer)); +} +#else +static inline int br_multicast_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + return 0; +} + +static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb) +{ + return NULL; +} + +static inline void br_multicast_add_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_del_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_enable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_disable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_init(struct net_bridge *br) +{ +} + +static inline void br_multicast_open(struct net_bridge *br) +{ +} + +static inline void br_multicast_stop(struct net_bridge *br) +{ +} + +static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb) +{ +} + +static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, + struct sk_buff *skb2) +{ +} +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return 0; +} +#endif + /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); @@ -254,8 +482,9 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ -extern struct sysfs_ops brport_sysfs_ops; +extern const struct sysfs_ops brport_sysfs_ops; extern int br_sysfs_addif(struct net_bridge_port *p); +extern int br_sysfs_renameif(struct net_bridge_port *p); /* br_sysfs_br.c */ extern int br_sysfs_addbr(struct net_device *dev); @@ -264,6 +493,7 @@ extern void br_sysfs_delbr(struct net_device *dev); #else #define br_sysfs_addif(p) (0) +#define br_sysfs_renameif(p) (0) #define br_sysfs_addbr(dev) (0) #define br_sysfs_delbr(dev) do { } while(0) #endif /* CONFIG_SYSFS */ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index fd3f8d6c099..57186d84d2b 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -31,10 +31,9 @@ static const char *const br_port_state_names[] = { void br_log_state(const struct net_bridge_port *p) { - pr_info("%s: port %d(%s) entering %s state\n", - p->br->dev->name, p->port_no, p->dev->name, + br_info(p->br, "port %u(%s) entering %s state\n", + (unsigned) p->port_no, p->dev->name, br_port_state_names[p->state]); - } /* called under bridge lock */ @@ -300,7 +299,7 @@ void br_topology_change_detection(struct net_bridge *br) if (br->stp_enabled != BR_KERNEL_STP) return; - pr_info("%s: topology change detected, %s\n", br->dev->name, + br_info(br, "topology change detected, %s\n", isroot ? "propagating" : "sending tcn bpdu"); if (isroot) { @@ -386,6 +385,8 @@ static void br_make_forwarding(struct net_bridge_port *p) else p->state = BR_STATE_LEARNING; + br_multicast_enable_port(p); + br_log_state(p); if (br->forward_delay != 0) @@ -467,8 +468,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b void br_received_tcn_bpdu(struct net_bridge_port *p) { if (br_is_designated_port(p)) { - pr_info("%s: received tcn bpdu on port %i(%s)\n", - p->br->dev->name, p->port_no, p->dev->name); + br_info(p->br, "port %u(%s) received tcn bpdu\n", + (unsigned) p->port_no, p->dev->name); br_topology_change_detection(p->br); br_topology_change_acknowledge(p); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 81ae40b3f65..217bd225a42 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -15,6 +15,7 @@ #include <linux/netfilter_bridge.h> #include <linux/etherdevice.h> #include <linux/llc.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> @@ -49,7 +50,7 @@ static void br_send_bpdu(struct net_bridge_port *p, llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 9a52ac5b452..1d8826914cb 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -85,17 +85,16 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); + br_log_state(p); } /* called under bridge lock */ void br_stp_disable_port(struct net_bridge_port *p) { - struct net_bridge *br; + struct net_bridge *br = p->br; int wasroot; - br = p->br; - printk(KERN_INFO "%s: port %i(%s) entering %s state\n", - br->dev->name, p->port_no, p->dev->name, "disabled"); + br_log_state(p); wasroot = br_is_root_bridge(br); br_become_designated_port(p); @@ -108,6 +107,7 @@ void br_stp_disable_port(struct net_bridge_port *p) del_timer(&p->hold_timer); br_fdb_delete_by_port(br, p, 0); + br_multicast_disable_port(p); br_configuration_update(br); @@ -126,11 +126,10 @@ static void br_stp_start(struct net_bridge *br) r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (r == 0) { br->stp_enabled = BR_USER_STP; - printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); + br_debug(br, "userspace STP started\n"); } else { br->stp_enabled = BR_KERNEL_STP; - printk(KERN_INFO "%s: starting userspace STP failed, " - "starting kernel STP\n", br->dev->name); + br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); @@ -147,9 +146,7 @@ static void br_stp_stop(struct net_bridge *br) if (br->stp_enabled == BR_USER_STP) { r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); - printk(KERN_INFO "%s: userspace STP stopped, return code %d\n", - br->dev->name, r); - + br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 772a140bfdf..7b22456023c 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -35,7 +35,7 @@ static void br_hello_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *)arg; - pr_debug("%s: hello timer expired\n", br->dev->name); + br_debug(br, "hello timer expired\n"); spin_lock(&br->lock); if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); @@ -55,13 +55,9 @@ static void br_message_age_timer_expired(unsigned long arg) if (p->state == BR_STATE_DISABLED) return; - - pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n", - br->dev->name, - id->prio[0], id->prio[1], - id->addr[0], id->addr[1], id->addr[2], - id->addr[3], id->addr[4], id->addr[5], - p->port_no, p->dev->name); + br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n", + (unsigned) p->port_no, p->dev->name, + id->prio[0], id->prio[1], &id->addr); /* * According to the spec, the message age timer cannot be @@ -87,8 +83,8 @@ static void br_forward_delay_timer_expired(unsigned long arg) struct net_bridge_port *p = (struct net_bridge_port *) arg; struct net_bridge *br = p->br; - pr_debug("%s: %d(%s) forward delay timer\n", - br->dev->name, p->port_no, p->dev->name); + br_debug(br, "port %u(%s) forward delay timer\n", + (unsigned) p->port_no, p->dev->name); spin_lock(&br->lock); if (p->state == BR_STATE_LISTENING) { p->state = BR_STATE_LEARNING; @@ -107,7 +103,7 @@ static void br_tcn_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *) arg; - pr_debug("%s: tcn timer expired\n", br->dev->name); + br_debug(br, "tcn timer expired\n"); spin_lock(&br->lock); if (br->dev->flags & IFF_UP) { br_transmit_tcn(br); @@ -121,7 +117,7 @@ static void br_topology_change_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *) arg; - pr_debug("%s: topo change timer expired\n", br->dev->name); + br_debug(br, "topo change timer expired\n"); spin_lock(&br->lock); br->topology_change_detected = 0; br->topology_change = 0; @@ -132,8 +128,8 @@ static void br_hold_timer_expired(unsigned long arg) { struct net_bridge_port *p = (struct net_bridge_port *) arg; - pr_debug("%s: %d(%s) hold timer expired\n", - p->br->dev->name, p->port_no, p->dev->name); + br_debug(p->br, "port %u(%s) hold timer expired\n", + (unsigned) p->port_no, p->dev->name); spin_lock(&p->br->lock); if (p->config_pending) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index bee4f300d0c..486b8f3861d 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -345,6 +345,273 @@ static ssize_t store_flush(struct device *d, } static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush); +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t show_multicast_router(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_router); +} + +static ssize_t store_multicast_router(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_router); +} +static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, + store_multicast_router); + +static ssize_t show_multicast_snooping(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", !br->multicast_disabled); +} + +static ssize_t store_multicast_snooping(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_toggle); +} +static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, + show_multicast_snooping, store_multicast_snooping); + +static ssize_t show_hash_elasticity(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->hash_elasticity); +} + +static int set_elasticity(struct net_bridge *br, unsigned long val) +{ + br->hash_elasticity = val; + return 0; +} + +static ssize_t store_hash_elasticity(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_elasticity); +} +static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity, + store_hash_elasticity); + +static ssize_t show_hash_max(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->hash_max); +} + +static ssize_t store_hash_max(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); +} +static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max, + store_hash_max); + +static ssize_t show_multicast_last_member_count(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_last_member_count); +} + +static int set_last_member_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_count = val; + return 0; +} + +static ssize_t store_multicast_last_member_count(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_count); +} +static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR, + show_multicast_last_member_count, + store_multicast_last_member_count); + +static ssize_t show_multicast_startup_query_count( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_startup_query_count); +} + +static int set_startup_query_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_count = val; + return 0; +} + +static ssize_t store_multicast_startup_query_count( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_count); +} +static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR, + show_multicast_startup_query_count, + store_multicast_startup_query_count); + +static ssize_t show_multicast_last_member_interval( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_last_member_interval)); +} + +static int set_last_member_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_last_member_interval( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_interval); +} +static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR, + show_multicast_last_member_interval, + store_multicast_last_member_interval); + +static ssize_t show_multicast_membership_interval( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_membership_interval)); +} + +static int set_membership_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_membership_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_membership_interval( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_membership_interval); +} +static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR, + show_multicast_membership_interval, + store_multicast_membership_interval); + +static ssize_t show_multicast_querier_interval(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_querier_interval)); +} + +static int set_querier_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_querier_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_querier_interval(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_querier_interval); +} +static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR, + show_multicast_querier_interval, + store_multicast_querier_interval); + +static ssize_t show_multicast_query_interval(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_interval)); +} + +static int set_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_query_interval(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_interval); +} +static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR, + show_multicast_query_interval, + store_multicast_query_interval); + +static ssize_t show_multicast_query_response_interval( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_response_interval)); +} + +static int set_query_response_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_response_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_query_response_interval( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_response_interval); +} +static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR, + show_multicast_query_response_interval, + store_multicast_query_response_interval); + +static ssize_t show_multicast_startup_query_interval( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_startup_query_interval)); +} + +static int set_startup_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t store_multicast_startup_query_interval( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_interval); +} +static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, + show_multicast_startup_query_interval, + store_multicast_startup_query_interval); +#endif + static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, &dev_attr_hello_time.attr, @@ -364,6 +631,20 @@ static struct attribute *bridge_attrs[] = { &dev_attr_gc_timer.attr, &dev_attr_group_addr.attr, &dev_attr_flush.attr, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &dev_attr_multicast_router.attr, + &dev_attr_multicast_snooping.attr, + &dev_attr_hash_elasticity.attr, + &dev_attr_hash_max.attr, + &dev_attr_multicast_last_member_count.attr, + &dev_attr_multicast_startup_query_count.attr, + &dev_attr_multicast_last_member_interval.attr, + &dev_attr_multicast_membership_interval.attr, + &dev_attr_multicast_querier_interval.attr, + &dev_attr_multicast_query_interval.attr, + &dev_attr_multicast_query_response_interval.attr, + &dev_attr_multicast_startup_query_interval.attr, +#endif NULL }; @@ -378,7 +659,7 @@ static struct attribute_group bridge_group = { * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, +static ssize_t brforward_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 820643a3ba9..fd5799c9bc8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -159,6 +159,21 @@ static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR, show_hairpin_mode, store_hairpin_mode); +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->multicast_router); +} + +static ssize_t store_multicast_router(struct net_bridge_port *p, + unsigned long v) +{ + return br_multicast_set_port_router(p, v); +} +static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, + store_multicast_router); +#endif + static struct brport_attribute *brport_attrs[] = { &brport_attr_path_cost, &brport_attr_priority, @@ -176,6 +191,9 @@ static struct brport_attribute *brport_attrs[] = { &brport_attr_hold_timer, &brport_attr_flush, &brport_attr_hairpin_mode, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &brport_attr_multicast_router, +#endif NULL }; @@ -220,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -struct sysfs_ops brport_sysfs_ops = { +const struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; @@ -228,7 +246,7 @@ struct sysfs_ops brport_sysfs_ops = { /* * Add sysfs entries to ethernet device added to a bridge. * Creates a brport subdirectory with bridge attributes. - * Puts symlink in bridge's brport subdirectory + * Puts symlink in bridge's brif subdirectory */ int br_sysfs_addif(struct net_bridge_port *p) { @@ -239,15 +257,37 @@ int br_sysfs_addif(struct net_bridge_port *p) err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) - goto out2; + return err; for (a = brport_attrs; *a; ++a) { err = sysfs_create_file(&p->kobj, &((*a)->attr)); if (err) - goto out2; + return err; } - err = sysfs_create_link(br->ifobj, &p->kobj, p->dev->name); -out2: + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name); +} + +/* Rename bridge's brif symlink */ +int br_sysfs_renameif(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + int err; + + /* If a rename fails, the rollback will cause another + * rename call with the existing name. + */ + if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ)) + return 0; + + err = sysfs_rename_link(br->ifobj, &p->kobj, + p->sysfs_name, p->dev->name); + if (err) + netdev_notice(br->dev, "unable to rename link %s to %s", + p->sysfs_name, p->dev->name); + else + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + return err; } diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index bd91dc58d49..2a449b7ab8f 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -13,7 +13,7 @@ #include <linux/netfilter_bridge/ebt_802_3.h> static bool -ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_802_3_info *info = par->matchinfo; const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); @@ -36,14 +36,14 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par) +static int ebt_802_3_mt_check(const struct xt_mtchk_param *par) { const struct ebt_802_3_info *info = par->matchinfo; if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) - return false; + return -EINVAL; - return true; + return 0; } static struct xt_match ebt_802_3_mt_reg __read_mostly = { @@ -52,7 +52,7 @@ static struct xt_match ebt_802_3_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_802_3_mt, .checkentry = ebt_802_3_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_802_3_info)), + .matchsize = sizeof(struct ebt_802_3_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index b595f091f35..8b84c581be3 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -7,6 +7,7 @@ * August, 2003 * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ip.h> #include <linux/if_arp.h> #include <linux/module.h> @@ -128,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr) } static bool -ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_among_info *info = par->matchinfo; const char *dmac, *smac; @@ -171,7 +172,7 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_among_mt_check(const struct xt_mtchk_param *par) +static int ebt_among_mt_check(const struct xt_mtchk_param *par) { const struct ebt_among_info *info = par->matchinfo; const struct ebt_entry_match *em = @@ -186,24 +187,20 @@ static bool ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - printk(KERN_WARNING - "ebtables: among: wrong size: %d " - "against expected %d, rounded to %Zd\n", - em->match_size, expected_length, - EBT_ALIGN(expected_length)); - return false; + pr_info("wrong size: %d against expected %d, rounded to %Zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); + return -EINVAL; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { - printk(KERN_WARNING - "ebtables: among: dst integrity fail: %x\n", -err); - return false; + pr_info("dst integrity fail: %x\n", -err); + return -EINVAL; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { - printk(KERN_WARNING - "ebtables: among: src integrity fail: %x\n", -err); - return false; + pr_info("src integrity fail: %x\n", -err); + return -EINVAL; } - return true; + return 0; } static struct xt_match ebt_among_mt_reg __read_mostly = { diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index b7ad60419f9..cd457b891b2 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -16,7 +16,7 @@ #include <linux/netfilter_bridge/ebt_arp.h> static bool -ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_arp_info *info = par->matchinfo; const struct arphdr *ah; @@ -100,7 +100,7 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) +static int ebt_arp_mt_check(const struct xt_mtchk_param *par) { const struct ebt_arp_info *info = par->matchinfo; const struct ebt_entry *e = par->entryinfo; @@ -108,10 +108,10 @@ static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) if ((e->ethproto != htons(ETH_P_ARP) && e->ethproto != htons(ETH_P_RARP)) || e->invflags & EBT_IPROTO) - return false; + return -EINVAL; if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_match ebt_arp_mt_reg __read_mostly = { @@ -120,7 +120,7 @@ static struct xt_match ebt_arp_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_arp_mt, .checkentry = ebt_arp_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_arp_info)), + .matchsize = sizeof(struct ebt_arp_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 76584cd72e5..070cf134a22 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -16,7 +16,7 @@ #include <linux/netfilter_bridge/ebt_arpreply.h> static unsigned int -ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_arpreply_info *info = par->targinfo; const __be32 *siptr, *diptr; @@ -57,17 +57,17 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) return info->target; } -static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par) +static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) { const struct ebt_arpreply_info *info = par->targinfo; const struct ebt_entry *e = par->entryinfo; if (BASE_CHAIN && info->target == EBT_RETURN) - return false; + return -EINVAL; if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target ebt_arpreply_tg_reg __read_mostly = { @@ -78,7 +78,7 @@ static struct xt_target ebt_arpreply_tg_reg __read_mostly = { .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), .target = ebt_arpreply_tg, .checkentry = ebt_arpreply_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_arpreply_info)), + .targetsize = sizeof(struct ebt_arpreply_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 6b49ea9e31f..c59f7bfae6e 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -15,7 +15,7 @@ #include <linux/netfilter_bridge/ebt_nat.h> static unsigned int -ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; @@ -26,13 +26,13 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) return info->target; } -static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) +static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) { const struct ebt_nat_info *info = par->targinfo; unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) - return false; + return -EINVAL; hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); if ((strcmp(par->table, "nat") != 0 || @@ -40,10 +40,10 @@ static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) (1 << NF_BR_LOCAL_OUT)))) && (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) - return false; + return -EINVAL; if (INVALID_TARGET) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target ebt_dnat_tg_reg __read_mostly = { @@ -54,7 +54,7 @@ static struct xt_target ebt_dnat_tg_reg __read_mostly = { (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), .target = ebt_dnat_tg, .checkentry = ebt_dnat_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), + .targetsize = sizeof(struct ebt_nat_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index d771bbfbcbe..23bca62d58d 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -25,7 +25,7 @@ struct tcpudphdr { }; static bool -ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_ip_info *info = par->matchinfo; const struct iphdr *ih; @@ -77,31 +77,31 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_ip_mt_check(const struct xt_mtchk_param *par) +static int ebt_ip_mt_check(const struct xt_mtchk_param *par) { const struct ebt_ip_info *info = par->matchinfo; const struct ebt_entry *e = par->entryinfo; if (e->ethproto != htons(ETH_P_IP) || e->invflags & EBT_IPROTO) - return false; + return -EINVAL; if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) - return false; + return -EINVAL; if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { if (info->invflags & EBT_IP_PROTO) - return false; + return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return false; + return -EINVAL; } if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) - return false; + return -EINVAL; if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_match ebt_ip_mt_reg __read_mostly = { @@ -110,7 +110,7 @@ static struct xt_match ebt_ip_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_ip_mt, .checkentry = ebt_ip_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_ip_info)), + .matchsize = sizeof(struct ebt_ip_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 784a6573876..50a46afc2bc 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -4,7 +4,7 @@ * Authors: * Manohar Castelino <manohar.r.castelino@intel.com> * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> - * Jan Engelhardt <jengelh@computergmbh.de> + * Jan Engelhardt <jengelh@medozas.de> * * Summary: * This is just a modification of the IPv4 code written by @@ -28,15 +28,13 @@ struct tcpudphdr { }; static bool -ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; const struct tcpudphdr *pptr; struct tcpudphdr _ports; - struct in6_addr tmp_addr; - int i; ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) @@ -44,18 +42,10 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) return false; - for (i = 0; i < 4; i++) - tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & - info->smsk.in6_u.u6_addr32[i]; - if (info->bitmask & EBT_IP6_SOURCE && - FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), - EBT_IP6_SOURCE)) - return false; - for (i = 0; i < 4; i++) - tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & - info->dmsk.in6_u.u6_addr32[i]; - if (info->bitmask & EBT_IP6_DEST && - FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) + if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE) || + FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr), EBT_IP6_DEST)) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; @@ -90,30 +80,30 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par) +static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) { const struct ebt_entry *e = par->entryinfo; struct ebt_ip6_info *info = par->matchinfo; if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) - return false; + return -EINVAL; if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) - return false; + return -EINVAL; if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { if (info->invflags & EBT_IP6_PROTO) - return false; + return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return false; + return -EINVAL; } if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) - return false; + return -EINVAL; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_match ebt_ip6_mt_reg __read_mostly = { @@ -122,7 +112,7 @@ static struct xt_match ebt_ip6_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_ip6_mt, .checkentry = ebt_ip6_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_ip6_info)), + .matchsize = sizeof(struct ebt_ip6_info), .me = THIS_MODULE, }; @@ -139,4 +129,5 @@ static void __exit ebt_ip6_fini(void) module_init(ebt_ip6_init); module_exit(ebt_ip6_fini); MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index f7bd9192ff0..517e78befcb 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -10,6 +10,7 @@ * September, 2003 * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netdevice.h> #include <linux/spinlock.h> @@ -31,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) static bool -ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct ebt_limit_info *info = (void *)par->matchinfo; unsigned long now = jiffies; @@ -64,16 +65,16 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; } -static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) +static int ebt_limit_mt_check(const struct xt_mtchk_param *par) { struct ebt_limit_info *info = par->matchinfo; /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { - printk("Overflow in ebt_limit, try lower: %u/%u\n", + pr_info("overflow, try lower: %u/%u\n", info->avg, info->burst); - return false; + return -EINVAL; } /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ @@ -81,16 +82,32 @@ static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) info->credit = user2credits(info->avg * info->burst); info->credit_cap = user2credits(info->avg * info->burst); info->cost = user2credits(info->avg); - return true; + return 0; } + +#ifdef CONFIG_COMPAT +/* + * no conversion function needed -- + * only avg/burst have meaningful values in userspace. + */ +struct ebt_compat_limit_info { + compat_uint_t avg, burst; + compat_ulong_t prev; + compat_uint_t credit, credit_cap, cost; +}; +#endif + static struct xt_match ebt_limit_mt_reg __read_mostly = { .name = "limit", .revision = 0, .family = NFPROTO_BRIDGE, .match = ebt_limit_mt, .checkentry = ebt_limit_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_limit_info)), + .matchsize = sizeof(struct ebt_limit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct ebt_compat_limit_info), +#endif .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index e4ea3fdd1d4..6e5a8bb9b94 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -24,16 +24,16 @@ static DEFINE_SPINLOCK(ebt_log_lock); -static bool ebt_log_tg_check(const struct xt_tgchk_param *par) +static int ebt_log_tg_check(const struct xt_tgchk_param *par) { struct ebt_log_info *info = par->targinfo; if (info->bitmask & ~EBT_LOG_MASK) - return false; + return -EINVAL; if (info->loglevel >= 8) - return false; + return -EINVAL; info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; - return true; + return 0; } struct tcpudphdr @@ -171,7 +171,7 @@ out: } static unsigned int -ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; @@ -195,7 +195,7 @@ static struct xt_target ebt_log_tg_reg __read_mostly = { .family = NFPROTO_BRIDGE, .target = ebt_log_tg, .checkentry = ebt_log_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_log_info)), + .targetsize = sizeof(struct ebt_log_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 2fee7e8e2e9..66697cbd0a8 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -19,7 +19,7 @@ #include <linux/netfilter_bridge/ebt_mark_t.h> static unsigned int -ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_mark_t_info *info = par->targinfo; int action = info->target & -16; @@ -36,22 +36,48 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) return info->target | ~EBT_VERDICT_BITS; } -static bool ebt_mark_tg_check(const struct xt_tgchk_param *par) +static int ebt_mark_tg_check(const struct xt_tgchk_param *par) { const struct ebt_mark_t_info *info = par->targinfo; int tmp; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return false; + return -EINVAL; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return false; + return -EINVAL; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) - return false; - return true; + return -EINVAL; + return 0; } +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_t_info { + compat_ulong_t mark; + compat_uint_t target; +}; + +static void mark_tg_compat_from_user(void *dst, const void *src) +{ + const struct compat_ebt_mark_t_info *user = src; + struct ebt_mark_t_info *kern = dst; + + kern->mark = user->mark; + kern->target = user->target; +} + +static int mark_tg_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_t_info __user *user = dst; + const struct ebt_mark_t_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->target, &user->target)) + return -EFAULT; + return 0; +} +#endif static struct xt_target ebt_mark_tg_reg __read_mostly = { .name = "mark", @@ -59,7 +85,12 @@ static struct xt_target ebt_mark_tg_reg __read_mostly = { .family = NFPROTO_BRIDGE, .target = ebt_mark_tg, .checkentry = ebt_mark_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_mark_t_info)), + .targetsize = sizeof(struct ebt_mark_t_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_t_info), + .compat_from_user = mark_tg_compat_from_user, + .compat_to_user = mark_tg_compat_to_user, +#endif .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index ea570f214b1..d98baefc4c7 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -13,7 +13,7 @@ #include <linux/netfilter_bridge/ebt_mark_m.h> static bool -ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_mark_m_info *info = par->matchinfo; @@ -22,26 +22,63 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static bool ebt_mark_mt_check(const struct xt_mtchk_param *par) +static int ebt_mark_mt_check(const struct xt_mtchk_param *par) { const struct ebt_mark_m_info *info = par->matchinfo; if (info->bitmask & ~EBT_MARK_MASK) - return false; + return -EINVAL; if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) - return false; + return -EINVAL; if (!info->bitmask) - return false; - return true; + return -EINVAL; + return 0; } + +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_m_info { + compat_ulong_t mark, mask; + uint8_t invert, bitmask; +}; + +static void mark_mt_compat_from_user(void *dst, const void *src) +{ + const struct compat_ebt_mark_m_info *user = src; + struct ebt_mark_m_info *kern = dst; + + kern->mark = user->mark; + kern->mask = user->mask; + kern->invert = user->invert; + kern->bitmask = user->bitmask; +} + +static int mark_mt_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_m_info __user *user = dst; + const struct ebt_mark_m_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->mask, &user->mask) || + put_user(kern->invert, &user->invert) || + put_user(kern->bitmask, &user->bitmask)) + return -EFAULT; + return 0; +} +#endif + static struct xt_match ebt_mark_mt_reg __read_mostly = { .name = "mark_m", .revision = 0, .family = NFPROTO_BRIDGE, .match = ebt_mark_mt, .checkentry = ebt_mark_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_mark_m_info)), + .matchsize = sizeof(struct ebt_mark_m_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_m_info), + .compat_from_user = mark_mt_compat_from_user, + .compat_to_user = mark_mt_compat_to_user, +#endif .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 2a63d996dd4..5be68bbcc34 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -20,7 +20,7 @@ #include <net/netfilter/nf_log.h> static unsigned int -ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; @@ -35,14 +35,14 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) return EBT_CONTINUE; } -static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par) +static int ebt_nflog_tg_check(const struct xt_tgchk_param *par) { struct ebt_nflog_info *info = par->targinfo; if (info->flags & ~EBT_NFLOG_MASK) - return false; + return -EINVAL; info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; - return true; + return 0; } static struct xt_target ebt_nflog_tg_reg __read_mostly = { @@ -51,7 +51,7 @@ static struct xt_target ebt_nflog_tg_reg __read_mostly = { .family = NFPROTO_BRIDGE, .target = ebt_nflog_tg, .checkentry = ebt_nflog_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)), + .targetsize = sizeof(struct ebt_nflog_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index 883e96e2a54..496a5651530 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -13,21 +13,21 @@ #include <linux/netfilter_bridge/ebt_pkttype.h> static bool -ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_pkttype_info *info = par->matchinfo; return (skb->pkt_type == info->pkt_type) ^ info->invert; } -static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par) +static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { const struct ebt_pkttype_info *info = par->matchinfo; if (info->invert != 0 && info->invert != 1) - return false; + return -EINVAL; /* Allow any pkt_type value */ - return true; + return 0; } static struct xt_match ebt_pkttype_mt_reg __read_mostly = { @@ -36,7 +36,7 @@ static struct xt_match ebt_pkttype_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_pkttype_mt, .checkentry = ebt_pkttype_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_pkttype_info)), + .matchsize = sizeof(struct ebt_pkttype_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index c8a49f7a57b..9e19166ba45 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -16,7 +16,7 @@ #include <linux/netfilter_bridge/ebt_redirect.h> static unsigned int -ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_redirect_info *info = par->targinfo; @@ -32,23 +32,23 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) return info->target; } -static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par) +static int ebt_redirect_tg_check(const struct xt_tgchk_param *par) { const struct ebt_redirect_info *info = par->targinfo; unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) - return false; + return -EINVAL; hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); if ((strcmp(par->table, "nat") != 0 || hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) - return false; + return -EINVAL; if (INVALID_TARGET) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target ebt_redirect_tg_reg __read_mostly = { @@ -59,7 +59,7 @@ static struct xt_target ebt_redirect_tg_reg __read_mostly = { (1 << NF_BR_BROUTING), .target = ebt_redirect_tg, .checkentry = ebt_redirect_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_redirect_info)), + .targetsize = sizeof(struct ebt_redirect_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 8d04d4c302b..f8f0bd1a1d5 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -17,7 +17,7 @@ #include <linux/netfilter_bridge/ebt_nat.h> static unsigned int -ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; @@ -42,21 +42,21 @@ out: return info->target | ~EBT_VERDICT_BITS; } -static bool ebt_snat_tg_check(const struct xt_tgchk_param *par) +static int ebt_snat_tg_check(const struct xt_tgchk_param *par) { const struct ebt_nat_info *info = par->targinfo; int tmp; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return false; + return -EINVAL; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return false; + return -EINVAL; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target ebt_snat_tg_reg __read_mostly = { @@ -67,7 +67,7 @@ static struct xt_target ebt_snat_tg_reg __read_mostly = { .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), .target = ebt_snat_tg, .checkentry = ebt_snat_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), + .targetsize = sizeof(struct ebt_nat_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 75e29a9cebd..5b33a2e634a 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info, } static bool -ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; @@ -153,7 +153,7 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) +static int ebt_stp_mt_check(const struct xt_mtchk_param *par) { const struct ebt_stp_info *info = par->matchinfo; const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; @@ -162,13 +162,13 @@ static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) - return false; + return -EINVAL; /* Make sure the match only receives stp frames */ if (compare_ether_addr(e->destmac, bridge_ula) || compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) - return false; + return -EINVAL; - return true; + return 0; } static struct xt_match ebt_stp_mt_reg __read_mostly = { @@ -177,7 +177,7 @@ static struct xt_match ebt_stp_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_stp_mt, .checkentry = ebt_stp_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_stp_info)), + .matchsize = sizeof(struct ebt_stp_info), .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ce50688a643..ae3c7cef148 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -27,8 +27,9 @@ * flushed even if it is not full yet. * */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> @@ -43,9 +44,6 @@ #include <net/sock.h> #include "../br_private.h" -#define PRINTR(format, args...) do { if (net_ratelimit()) \ - printk(format , ## args); } while (0) - static unsigned int nlbufsiz = NLMSG_GOODSIZE; module_param(nlbufsiz, uint, 0600); MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " @@ -106,15 +104,14 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) n = max(size, nlbufsiz); skb = alloc_skb(n, GFP_ATOMIC); if (!skb) { - PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer " - "of size %ub!\n", n); + pr_debug("cannot alloc whole buffer of size %ub!\n", n); if (n > size) { /* try to allocate only as much as we need for * current packet */ skb = alloc_skb(size, GFP_ATOMIC); if (!skb) - PRINTR(KERN_ERR "ebt_ulog: can't even allocate " - "buffer of size %ub\n", size); + pr_debug("cannot even allocate " + "buffer of size %ub\n", size); } } @@ -141,8 +138,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, size = NLMSG_SPACE(sizeof(*pm) + copy_len); if (size > nlbufsiz) { - PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n", - size, nlbufsiz); + pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); return; } @@ -216,8 +212,8 @@ unlock: return; nlmsg_failure: - printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should " - "not happen, please report to author.\n"); + pr_debug("error during NLMSG_PUT. This should " + "not happen, please report to author.\n"); goto unlock; alloc_failure: goto unlock; @@ -247,26 +243,26 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, } static unsigned int -ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) +ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { ebt_ulog_packet(par->hooknum, skb, par->in, par->out, par->targinfo, NULL); return EBT_CONTINUE; } -static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par) +static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { struct ebt_ulog_info *uloginfo = par->targinfo; if (uloginfo->nlgroup > 31) - return false; + return -EINVAL; uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; - return true; + return 0; } static struct xt_target ebt_ulog_tg_reg __read_mostly = { @@ -275,7 +271,7 @@ static struct xt_target ebt_ulog_tg_reg __read_mostly = { .family = NFPROTO_BRIDGE, .target = ebt_ulog_tg, .checkentry = ebt_ulog_tg_check, - .targetsize = XT_ALIGN(sizeof(struct ebt_ulog_info)), + .targetsize = sizeof(struct ebt_ulog_info), .me = THIS_MODULE, }; @@ -291,8 +287,8 @@ static int __init ebt_ulog_init(void) int i; if (nlbufsiz >= 128*1024) { - printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," - " please try a smaller nlbufsiz parameter.\n"); + pr_warning("Netlink buffer has to be <= 128kB," + " please try a smaller nlbufsiz parameter.\n"); return -EINVAL; } @@ -305,13 +301,10 @@ static int __init ebt_ulog_init(void) ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); - if (!ebtulognl) { - printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " - "call netlink_kernel_create\n"); + if (!ebtulognl) ret = -ENOMEM; - } else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) { + else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) netlink_kernel_release(ebtulognl); - } if (ret == 0) nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 3dddd489328..87b53b3a921 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -26,22 +26,17 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_vlan.h> -static int debug; #define MODULE_VERS "0.6" -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages"); MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match"); MODULE_LICENSE("GPL"); - -#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } static bool -ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) +ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_vlan_info *info = par->matchinfo; const struct vlan_hdr *fp; @@ -84,32 +79,31 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) +static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) { struct ebt_vlan_info *info = par->matchinfo; const struct ebt_entry *e = par->entryinfo; /* Is it 802.1Q frame checked? */ if (e->ethproto != htons(ETH_P_8021Q)) { - DEBUG_MSG - ("passed entry proto %2.4X is not 802.1Q (8100)\n", - (unsigned short) ntohs(e->ethproto)); - return false; + pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n", + ntohs(e->ethproto)); + return -EINVAL; } /* Check for bitmask range * True if even one bit is out of mask */ if (info->bitmask & ~EBT_VLAN_MASK) { - DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", - info->bitmask, EBT_VLAN_MASK); - return false; + pr_debug("bitmask %2X is out of mask (%2X)\n", + info->bitmask, EBT_VLAN_MASK); + return -EINVAL; } /* Check for inversion flags range */ if (info->invflags & ~EBT_VLAN_MASK) { - DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", - info->invflags, EBT_VLAN_MASK); - return false; + pr_debug("inversion flags %2X is out of mask (%2X)\n", + info->invflags, EBT_VLAN_MASK); + return -EINVAL; } /* Reserved VLAN ID (VID) values @@ -121,10 +115,9 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) if (GET_BITMASK(EBT_VLAN_ID)) { if (!!info->id) { /* if id!=0 => check vid range */ if (info->id > VLAN_GROUP_ARRAY_LEN) { - DEBUG_MSG - ("id %d is out of range (1-4096)\n", - info->id); - return false; + pr_debug("id %d is out of range (1-4096)\n", + info->id); + return -EINVAL; } /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, @@ -137,9 +130,9 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) if (GET_BITMASK(EBT_VLAN_PRIO)) { if ((unsigned char) info->prio > 7) { - DEBUG_MSG("prio %d is out of range (0-7)\n", - info->prio); - return false; + pr_debug("prio %d is out of range (0-7)\n", + info->prio); + return -EINVAL; } } /* Check for encapsulated proto range - it is possible to be @@ -147,14 +140,13 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ if (GET_BITMASK(EBT_VLAN_ENCAP)) { if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { - DEBUG_MSG - ("encap frame length %d is less than minimal\n", - ntohs(info->encap)); - return false; + pr_debug("encap frame length %d is less than " + "minimal\n", ntohs(info->encap)); + return -EINVAL; } } - return true; + return 0; } static struct xt_match ebt_vlan_mt_reg __read_mostly = { @@ -163,15 +155,13 @@ static struct xt_match ebt_vlan_mt_reg __read_mostly = { .family = NFPROTO_BRIDGE, .match = ebt_vlan_mt, .checkentry = ebt_vlan_mt_check, - .matchsize = XT_ALIGN(sizeof(struct ebt_vlan_info)), + .matchsize = sizeof(struct ebt_vlan_info), .me = THIS_MODULE, }; static int __init ebt_vlan_init(void) { - DEBUG_MSG("ebtables 802.1Q extension module v" - MODULE_VERS "\n"); - DEBUG_MSG("module debug=%d\n", !!debug); + pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n"); return xt_register_match(&ebt_vlan_mt_reg); } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d32ab13e728..ae3f106c390 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -71,7 +71,7 @@ static int __net_init broute_net_init(struct net *net) static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net->xt.broute_table); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 60b1a6ca718..42e6bd09457 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -107,7 +107,7 @@ static int __net_init frame_filter_net_init(struct net *net) static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 4a98804203b..6dc2f878ae0 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -107,7 +107,7 @@ static int __net_init frame_nat_net_init(struct net *net) static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0b7f262cd14..59ca00e40de 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -14,8 +14,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> @@ -23,6 +22,7 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> @@ -33,11 +33,6 @@ #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ - ": out of memory: "format, ## args) -/* #define MEMPRINT(format, args...) */ - - /* * Each cpu has its own set of counters, so there is no need for write_lock in @@ -56,16 +51,42 @@ static DEFINE_MUTEX(ebt_mutex); +#ifdef CONFIG_COMPAT +static void ebt_standard_compat_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v >= 0) + v += xt_compat_calc_jump(NFPROTO_BRIDGE, v); + memcpy(dst, &v, sizeof(v)); +} + +static int ebt_standard_compat_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv >= 0) + cv -= xt_compat_calc_jump(NFPROTO_BRIDGE, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} +#endif + + static struct xt_target ebt_standard_target = { .name = "standard", .revision = 0, .family = NFPROTO_BRIDGE, .targetsize = sizeof(int), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = ebt_standard_compat_from_user, + .compat_to_user = ebt_standard_compat_to_user, +#endif }; static inline int ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, - struct xt_target_param *par) + struct xt_action_param *par) { par->target = w->u.watcher; par->targinfo = w->data; @@ -74,15 +95,17 @@ ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, return 0; } -static inline int ebt_do_match (struct ebt_entry_match *m, - const struct sk_buff *skb, struct xt_match_param *par) +static inline int +ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb, + struct xt_action_param *par) { par->match = m->u.match; par->matchinfo = m->data; return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; } -static inline int ebt_dev_check(char *entry, const struct net_device *device) +static inline int +ebt_dev_check(const char *entry, const struct net_device *device) { int i = 0; const char *devname; @@ -100,8 +123,9 @@ static inline int ebt_dev_check(char *entry, const struct net_device *device) #define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ -static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, - const struct net_device *in, const struct net_device *out) +static inline int +ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, + const struct net_device *in, const struct net_device *out) { int verdict, i; @@ -156,21 +180,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, int i, nentries; struct ebt_entry *point; struct ebt_counter *counter_base, *cb_base; - struct ebt_entry_target *t; + const struct ebt_entry_target *t; int verdict, sp = 0; struct ebt_chainstack *cs; struct ebt_entries *chaininfo; - char *base; - struct ebt_table_info *private; - bool hotdrop = false; - struct xt_match_param mtpar; - struct xt_target_param tgpar; - - mtpar.family = tgpar.family = NFPROTO_BRIDGE; - mtpar.in = tgpar.in = in; - mtpar.out = tgpar.out = out; - mtpar.hotdrop = &hotdrop; - mtpar.hooknum = tgpar.hooknum = hook; + const char *base; + const struct ebt_table_info *private; + struct xt_action_param acpar; + + acpar.family = NFPROTO_BRIDGE; + acpar.in = in; + acpar.out = out; + acpar.hotdrop = false; + acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -191,9 +213,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) goto letscontinue; - if (hotdrop) { + if (acpar.hotdrop) { read_unlock_bh(&table->lock); return NF_DROP; } @@ -204,7 +226,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar); + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = (struct ebt_entry_target *) (((char *)point) + point->target_offset); @@ -212,9 +234,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else { - tgpar.target = t->u.target; - tgpar.targinfo = t->data; - verdict = t->u.target->target(skb, &tgpar); + acpar.target = t->u.target; + acpar.targinfo = t->data; + verdict = t->u.target->target(skb, &acpar); } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); @@ -339,12 +361,9 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, - m->u.name, 0), "ebt_%s", m->u.name); + match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0); if (IS_ERR(match)) return PTR_ERR(match); - if (match == NULL) - return -ENOENT; m->u.match = match; par->match = match; @@ -373,13 +392,9 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; - watcher = try_then_request_module( - xt_find_target(NFPROTO_BRIDGE, w->u.name, 0), - "ebt_%s", w->u.name); + watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); if (IS_ERR(watcher)) return PTR_ERR(watcher); - if (watcher == NULL) - return -ENOENT; w->u.watcher = watcher; par->target = watcher; @@ -395,7 +410,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, return 0; } -static int ebt_verify_pointers(struct ebt_replace *repl, +static int ebt_verify_pointers(const struct ebt_replace *repl, struct ebt_table_info *newinfo) { unsigned int limit = repl->entries_size; @@ -442,6 +457,8 @@ static int ebt_verify_pointers(struct ebt_replace *repl, break; if (left < e->next_offset) break; + if (e->next_offset < sizeof(struct ebt_entry)) + return -EINVAL; offset += e->next_offset; } } @@ -466,8 +483,8 @@ static int ebt_verify_pointers(struct ebt_replace *repl, * to parse the userspace data */ static inline int -ebt_check_entry_size_and_hooks(struct ebt_entry *e, - struct ebt_table_info *newinfo, +ebt_check_entry_size_and_hooks(const struct ebt_entry *e, + const struct ebt_table_info *newinfo, unsigned int *n, unsigned int *cnt, unsigned int *totalcnt, unsigned int *udc_cnt) { @@ -561,13 +578,14 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, } static inline int -ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) +ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.match; par.matchinfo = m->data; par.family = NFPROTO_BRIDGE; @@ -578,13 +596,14 @@ ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) } static inline int -ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) +ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.target = w->u.watcher; par.targinfo = w->data; par.family = NFPROTO_BRIDGE; @@ -595,7 +614,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) } static inline int -ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) +ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) { struct xt_tgdtor_param par; struct ebt_entry_target *t; @@ -605,10 +624,11 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) /* we're done */ if (cnt && (*cnt)-- == 0) return 1; - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); - EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + par.net = net; par.target = t->u.target; par.targinfo = t->data; par.family = NFPROTO_BRIDGE; @@ -619,7 +639,8 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) } static inline int -ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, +ebt_check_entry(struct ebt_entry *e, struct net *net, + const struct ebt_table_info *newinfo, const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { @@ -671,6 +692,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, } i = 0; + mtpar.net = tgpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; @@ -685,15 +707,10 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); gap = e->next_offset - e->target_offset; - target = try_then_request_module( - xt_find_target(NFPROTO_BRIDGE, t->u.name, 0), - "ebt_%s", t->u.name); + target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_watchers; - } else if (target == NULL) { - ret = -ENOENT; - goto cleanup_watchers; } t->u.target = target; @@ -726,9 +743,9 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, (*cnt)++; return 0; cleanup_watchers: - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); cleanup_matches: - EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; } @@ -737,12 +754,12 @@ cleanup_matches: * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ -static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s, +static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; - struct ebt_entry *e = (struct ebt_entry *)chain->data; - struct ebt_entry_target *t; + const struct ebt_entry *e = (struct ebt_entry *)chain->data; + const struct ebt_entry_target *t; while (pos < nentries || chain_nr != -1) { /* end of udc, go back one 'recursion' step */ @@ -808,7 +825,8 @@ letscontinue: } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ -static int translate_table(char *name, struct ebt_table_info *newinfo) +static int translate_table(struct net *net, const char *name, + struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; @@ -917,17 +935,17 @@ static int translate_table(char *name, struct ebt_table_info *newinfo) /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt); + ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, &i); + ebt_cleanup_entry, net, &i); } vfree(cl_s); return ret; } /* called under write_lock */ -static void get_counters(struct ebt_counter *oldcounters, +static void get_counters(const struct ebt_counter *oldcounters, struct ebt_counter *counters, unsigned int nentries) { int i, cpu; @@ -949,90 +967,45 @@ static void get_counters(struct ebt_counter *oldcounters, } } -/* replace the table */ -static int do_replace(struct net *net, void __user *user, unsigned int len) +static int do_replace_finish(struct net *net, struct ebt_replace *repl, + struct ebt_table_info *newinfo) { - int ret, i, countersize; - struct ebt_table_info *newinfo; - struct ebt_replace tmp; - struct ebt_table *t; + int ret, i; struct ebt_counter *counterstmp = NULL; /* used to be able to unlock earlier */ struct ebt_table_info *table; - - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - if (len != sizeof(tmp) + tmp.entries_size) { - BUGPRINT("Wrong len argument\n"); - return -EINVAL; - } - - if (tmp.entries_size == 0) { - BUGPRINT("Entries_size never zero\n"); - return -EINVAL; - } - /* overflow check */ - if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) - return -ENOMEM; - if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) - return -ENOMEM; - - countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = vmalloc(sizeof(*newinfo) + countersize); - if (!newinfo) - return -ENOMEM; - - if (countersize) - memset(newinfo->counters, 0, countersize); - - newinfo->entries = vmalloc(tmp.entries_size); - if (!newinfo->entries) { - ret = -ENOMEM; - goto free_newinfo; - } - if (copy_from_user( - newinfo->entries, tmp.entries, tmp.entries_size) != 0) { - BUGPRINT("Couldn't copy entries from userspace\n"); - ret = -EFAULT; - goto free_entries; - } + struct ebt_table *t; /* the user wants counters back the check on the size is done later, when we have the lock */ - if (tmp.num_counters) { - counterstmp = vmalloc(tmp.num_counters * sizeof(*counterstmp)); - if (!counterstmp) { - ret = -ENOMEM; - goto free_entries; - } + if (repl->num_counters) { + unsigned long size = repl->num_counters * sizeof(*counterstmp); + counterstmp = vmalloc(size); + if (!counterstmp) + return -ENOMEM; } - else - counterstmp = NULL; - /* this can get initialized by translate_table() */ newinfo->chainstack = NULL; - ret = ebt_verify_pointers(&tmp, newinfo); + ret = ebt_verify_pointers(repl, newinfo); if (ret != 0) goto free_counterstmp; - ret = translate_table(tmp.name, newinfo); + ret = translate_table(net, repl->name, newinfo); if (ret != 0) goto free_counterstmp; - t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, repl->name, &ret, &ebt_mutex); if (!t) { ret = -ENOENT; goto free_iterate; } /* the table doesn't like it */ - if (t->check && (ret = t->check(newinfo, tmp.valid_hooks))) + if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) goto free_unlock; - if (tmp.num_counters && tmp.num_counters != t->private->nentries) { + if (repl->num_counters && repl->num_counters != t->private->nentries) { BUGPRINT("Wrong nr. of counters requested\n"); ret = -EINVAL; goto free_unlock; @@ -1048,7 +1021,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) module_put(t->me); /* we need an atomic snapshot of the counters */ write_lock_bh(&t->lock); - if (tmp.num_counters) + if (repl->num_counters) get_counters(t->private->counters, counterstmp, t->private->nentries); @@ -1059,10 +1032,9 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) allocation. Only reason why this is done is because this way the lock is held only once, while this doesn't bring the kernel into a dangerous state. */ - if (tmp.num_counters && - copy_to_user(tmp.counters, counterstmp, - tmp.num_counters * sizeof(struct ebt_counter))) { - BUGPRINT("Couldn't copy counters to userspace\n"); + if (repl->num_counters && + copy_to_user(repl->counters, counterstmp, + repl->num_counters * sizeof(struct ebt_counter))) { ret = -EFAULT; } else @@ -1070,7 +1042,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); vfree(table->entries); if (table->chainstack) { @@ -1087,7 +1059,7 @@ free_unlock: mutex_unlock(&ebt_mutex); free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ @@ -1096,6 +1068,59 @@ free_counterstmp: vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } + return ret; +} + +/* replace the table */ +static int do_replace(struct net *net, const void __user *user, + unsigned int len) +{ + int ret, countersize; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) { + BUGPRINT("Wrong len argument\n"); + return -EINVAL; + } + + if (tmp.entries_size == 0) { + BUGPRINT("Entries_size never zero\n"); + return -EINVAL; + } + /* overflow check */ + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; + + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + newinfo->entries = vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + BUGPRINT("Couldn't copy entries from userspace\n"); + ret = -EFAULT; + goto free_entries; + } + + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; free_entries: vfree(newinfo->entries); free_newinfo: @@ -1154,7 +1179,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) newinfo->hook_entry[i] = p + ((char *)repl->hook_entry[i] - repl->entries); } - ret = translate_table(repl->name, newinfo); + ret = translate_table(net, repl->name, newinfo); if (ret != 0) { BUGPRINT("Translate_table failed\n"); goto free_chainstack; @@ -1204,7 +1229,7 @@ out: return ERR_PTR(ret); } -void ebt_unregister_table(struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table) { int i; @@ -1216,7 +1241,7 @@ void ebt_unregister_table(struct ebt_table *table) list_del(&table->list); mutex_unlock(&ebt_mutex); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); if (table->private->nentries) module_put(table->me); vfree(table->private->entries); @@ -1230,39 +1255,33 @@ void ebt_unregister_table(struct ebt_table *table) } /* userspace just supplied us with counters */ -static int update_counters(struct net *net, void __user *user, unsigned int len) +static int do_update_counters(struct net *net, const char *name, + struct ebt_counter __user *counters, + unsigned int num_counters, + const void __user *user, unsigned int len) { int i, ret; struct ebt_counter *tmp; - struct ebt_replace hlp; struct ebt_table *t; - if (copy_from_user(&hlp, user, sizeof(hlp))) - return -EFAULT; - - if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) - return -EINVAL; - if (hlp.num_counters == 0) + if (num_counters == 0) return -EINVAL; - if (!(tmp = vmalloc(hlp.num_counters * sizeof(*tmp)))) { - MEMPRINT("Update_counters && nomemory\n"); + tmp = vmalloc(num_counters * sizeof(*tmp)); + if (!tmp) return -ENOMEM; - } - t = find_table_lock(net, hlp.name, &ret, &ebt_mutex); + t = find_table_lock(net, name, &ret, &ebt_mutex); if (!t) goto free_tmp; - if (hlp.num_counters != t->private->nentries) { + if (num_counters != t->private->nentries) { BUGPRINT("Wrong nr of counters\n"); ret = -EINVAL; goto unlock_mutex; } - if ( copy_from_user(tmp, hlp.counters, - hlp.num_counters * sizeof(struct ebt_counter)) ) { - BUGPRINT("Updata_counters && !cfu\n"); + if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) { ret = -EFAULT; goto unlock_mutex; } @@ -1271,7 +1290,7 @@ static int update_counters(struct net *net, void __user *user, unsigned int len) write_lock_bh(&t->lock); /* we add to the counters of the first cpu */ - for (i = 0; i < hlp.num_counters; i++) { + for (i = 0; i < num_counters; i++) { t->private->counters[i].pcnt += tmp[i].pcnt; t->private->counters[i].bcnt += tmp[i].bcnt; } @@ -1285,8 +1304,23 @@ free_tmp: return ret; } -static inline int ebt_make_matchname(struct ebt_entry_match *m, - char *base, char __user *ubase) +static int update_counters(struct net *net, const void __user *user, + unsigned int len) +{ + struct ebt_replace hlp; + + if (copy_from_user(&hlp, user, sizeof(hlp))) + return -EFAULT; + + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return -EINVAL; + + return do_update_counters(net, hlp.name, hlp.counters, + hlp.num_counters, user, len); +} + +static inline int ebt_make_matchname(const struct ebt_entry_match *m, + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)m - base); if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN)) @@ -1294,8 +1328,8 @@ static inline int ebt_make_matchname(struct ebt_entry_match *m, return 0; } -static inline int ebt_make_watchername(struct ebt_entry_watcher *w, - char *base, char __user *ubase) +static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)w - base); if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN)) @@ -1303,11 +1337,12 @@ static inline int ebt_make_watchername(struct ebt_entry_watcher *w, return 0; } -static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *ubase) +static inline int +ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) { int ret; char __user *hlp; - struct ebt_entry_target *t; + const struct ebt_entry_target *t; if (e->bitmask == 0) return 0; @@ -1326,13 +1361,46 @@ static inline int ebt_make_names(struct ebt_entry *e, char *base, char __user *u return 0; } +static int copy_counters_to_user(struct ebt_table *t, + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) +{ + struct ebt_counter *counterstmp; + int ret = 0; + + /* userspace might not need the counters */ + if (num_counters == 0) + return 0; + + if (num_counters != nentries) { + BUGPRINT("Num_counters wrong\n"); + return -EINVAL; + } + + counterstmp = vmalloc(nentries * sizeof(*counterstmp)); + if (!counterstmp) + return -ENOMEM; + + write_lock_bh(&t->lock); + get_counters(oldcounters, counterstmp, nentries); + write_unlock_bh(&t->lock); + + if (copy_to_user(user, counterstmp, + nentries * sizeof(struct ebt_counter))) + ret = -EFAULT; + vfree(counterstmp); + return ret; +} + /* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, - int *len, int cmd) + const int *len, int cmd) { struct ebt_replace tmp; - struct ebt_counter *counterstmp, *oldcounters; + const struct ebt_counter *oldcounters; unsigned int entries_size, nentries; + int ret; char *entries; if (cmd == EBT_SO_GET_ENTRIES) { @@ -1347,16 +1415,12 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, oldcounters = t->table->counters; } - if (copy_from_user(&tmp, user, sizeof(tmp))) { - BUGPRINT("Cfu didn't work\n"); + if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - } if (*len != sizeof(struct ebt_replace) + entries_size + - (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) { - BUGPRINT("Wrong size\n"); + (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) return -EINVAL; - } if (tmp.nentries != nentries) { BUGPRINT("Nentries wrong\n"); @@ -1368,29 +1432,10 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, return -EINVAL; } - /* userspace might not need the counters */ - if (tmp.num_counters) { - if (tmp.num_counters != nentries) { - BUGPRINT("Num_counters wrong\n"); - return -EINVAL; - } - counterstmp = vmalloc(nentries * sizeof(*counterstmp)); - if (!counterstmp) { - MEMPRINT("Couldn't copy counters, out of memory\n"); - return -ENOMEM; - } - write_lock_bh(&t->lock); - get_counters(oldcounters, counterstmp, nentries); - write_unlock_bh(&t->lock); - - if (copy_to_user(tmp.counters, counterstmp, - nentries * sizeof(struct ebt_counter))) { - BUGPRINT("Couldn't copy counters to userspace\n"); - vfree(counterstmp); - return -EFAULT; - } - vfree(counterstmp); - } + ret = copy_counters_to_user(t, oldcounters, tmp.counters, + tmp.num_counters, nentries); + if (ret) + return ret; if (copy_to_user(tmp.entries, entries, entries_size)) { BUGPRINT("Couldn't copy entries to userspace\n"); @@ -1418,7 +1463,7 @@ static int do_ebt_set_ctl(struct sock *sk, break; default: ret = -EINVAL; - } + } return ret; } @@ -1478,15 +1523,892 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } +#ifdef CONFIG_COMPAT +/* 32 bit-userspace compatibility definitions. */ +struct compat_ebt_replace { + char name[EBT_TABLE_MAXNAMELEN]; + compat_uint_t valid_hooks; + compat_uint_t nentries; + compat_uint_t entries_size; + /* start of the chains */ + compat_uptr_t hook_entry[NF_BR_NUMHOOKS]; + /* nr of counters userspace expects back */ + compat_uint_t num_counters; + /* where the kernel will put the old counters. */ + compat_uptr_t counters; + compat_uptr_t entries; +}; + +/* struct ebt_entry_match, _target and _watcher have same layout */ +struct compat_ebt_entry_mwt { + union { + char name[EBT_FUNCTION_MAXNAMELEN]; + compat_uptr_t ptr; + } u; + compat_uint_t match_size; + compat_uint_t data[0]; +}; + +/* account for possible padding between match_size and ->data */ +static int ebt_compat_entry_padsize(void) +{ + BUILD_BUG_ON(XT_ALIGN(sizeof(struct ebt_entry_match)) < + COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt))); + return (int) XT_ALIGN(sizeof(struct ebt_entry_match)) - + COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt)); +} + +static int ebt_compat_match_offset(const struct xt_match *match, + unsigned int userlen) +{ + /* + * ebt_among needs special handling. The kernel .matchsize is + * set to -1 at registration time; at runtime an EBT_ALIGN()ed + * value is expected. + * Example: userspace sends 4500, ebt_among.c wants 4504. + */ + if (unlikely(match->matchsize == -1)) + return XT_ALIGN(userlen) - COMPAT_XT_ALIGN(userlen); + return xt_compat_match_offset(match); +} + +static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, + unsigned int *size) +{ + const struct xt_match *match = m->u.match; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = ebt_compat_match_offset(match, m->match_size); + compat_uint_t msize = m->match_size - off; + + BUG_ON(off >= m->match_size); + + if (copy_to_user(cm->u.name, match->name, + strlen(match->name) + 1) || put_user(msize, &cm->match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user(cm->data, m->data)) + return -EFAULT; + } else if (copy_to_user(cm->data, m->data, msize)) + return -EFAULT; + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += msize; + return 0; +} + +static int compat_target_to_user(struct ebt_entry_target *t, + void __user **dstptr, + unsigned int *size) +{ + const struct xt_target *target = t->u.target; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = xt_compat_target_offset(target); + compat_uint_t tsize = t->target_size - off; + + BUG_ON(off >= t->target_size); + + if (copy_to_user(cm->u.name, target->name, + strlen(target->name) + 1) || put_user(tsize, &cm->match_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user(cm->data, t->data)) + return -EFAULT; + } else if (copy_to_user(cm->data, t->data, tsize)) + return -EFAULT; + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += tsize; + return 0; +} + +static int compat_watcher_to_user(struct ebt_entry_watcher *w, + void __user **dstptr, + unsigned int *size) +{ + return compat_target_to_user((struct ebt_entry_target *)w, + dstptr, size); +} + +static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, + unsigned int *size) +{ + struct ebt_entry_target *t; + struct ebt_entry __user *ce; + u32 watchers_offset, target_offset, next_offset; + compat_uint_t origsize; + int ret; + + if (e->bitmask == 0) { + if (*size < sizeof(struct ebt_entries)) + return -EINVAL; + if (copy_to_user(*dstptr, e, sizeof(struct ebt_entries))) + return -EFAULT; + + *dstptr += sizeof(struct ebt_entries); + *size -= sizeof(struct ebt_entries); + return 0; + } + + if (*size < sizeof(*ce)) + return -EINVAL; + + ce = (struct ebt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(*ce))) + return -EFAULT; + + origsize = *size; + *dstptr += sizeof(*ce); + + ret = EBT_MATCH_ITERATE(e, compat_match_to_user, dstptr, size); + if (ret) + return ret; + watchers_offset = e->watchers_offset - (origsize - *size); + + ret = EBT_WATCHER_ITERATE(e, compat_watcher_to_user, dstptr, size); + if (ret) + return ret; + target_offset = e->target_offset - (origsize - *size); + + t = (struct ebt_entry_target *) ((char *) e + e->target_offset); + + ret = compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + + if (put_user(watchers_offset, &ce->watchers_offset) || + put_user(target_offset, &ce->target_offset) || + put_user(next_offset, &ce->next_offset)) + return -EFAULT; + + *size -= sizeof(*ce); + return 0; +} + +static int compat_calc_match(struct ebt_entry_match *m, int *off) +{ + *off += ebt_compat_match_offset(m->u.match, m->match_size); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_watcher(struct ebt_entry_watcher *w, int *off) +{ + *off += xt_compat_target_offset(w->u.watcher); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_entry(const struct ebt_entry *e, + const struct ebt_table_info *info, + const void *base, + struct compat_ebt_replace *newinfo) +{ + const struct ebt_entry_target *t; + unsigned int entry_offset; + int off, ret, i; + + if (e->bitmask == 0) + return 0; + + off = 0; + entry_offset = (void *)e - base; + + EBT_MATCH_ITERATE(e, compat_calc_match, &off); + EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off); + + t = (const struct ebt_entry_target *) ((char *) e + e->target_offset); + + off += xt_compat_target_offset(t->u.target); + off += ebt_compat_entry_padsize(); + + newinfo->entries_size -= off; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + const void *hookptr = info->hook_entry[i]; + if (info->hook_entry[i] && + (e < (struct ebt_entry *)(base - hookptr))) { + newinfo->hook_entry[i] -= off; + pr_debug("0x%08X -> 0x%08X\n", + newinfo->hook_entry[i] + off, + newinfo->hook_entry[i]); + } + } + + return 0; +} + + +static int compat_table_info(const struct ebt_table_info *info, + struct compat_ebt_replace *newinfo) +{ + unsigned int size = info->entries_size; + const void *entries = info->entries; + + newinfo->entries_size = size; + + return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, + entries, newinfo); +} + +static int compat_copy_everything_to_user(struct ebt_table *t, + void __user *user, int *len, int cmd) +{ + struct compat_ebt_replace repl, tmp; + struct ebt_counter *oldcounters; + struct ebt_table_info tinfo; + int ret; + void __user *pos; + + memset(&tinfo, 0, sizeof(tinfo)); + + if (cmd == EBT_SO_GET_ENTRIES) { + tinfo.entries_size = t->private->entries_size; + tinfo.nentries = t->private->nentries; + tinfo.entries = t->private->entries; + oldcounters = t->private->counters; + } else { + tinfo.entries_size = t->table->entries_size; + tinfo.nentries = t->table->nentries; + tinfo.entries = t->table->entries; + oldcounters = t->table->counters; + } + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (tmp.nentries != tinfo.nentries || + (tmp.num_counters && tmp.num_counters != tinfo.nentries)) + return -EINVAL; + + memcpy(&repl, &tmp, sizeof(repl)); + if (cmd == EBT_SO_GET_ENTRIES) + ret = compat_table_info(t->private, &repl); + else + ret = compat_table_info(&tinfo, &repl); + if (ret) + return ret; + + if (*len != sizeof(tmp) + repl.entries_size + + (tmp.num_counters? tinfo.nentries * sizeof(struct ebt_counter): 0)) { + pr_err("wrong size: *len %d, entries_size %u, replsz %d\n", + *len, tinfo.entries_size, repl.entries_size); + return -EINVAL; + } + + /* userspace might not need the counters */ + ret = copy_counters_to_user(t, oldcounters, compat_ptr(tmp.counters), + tmp.num_counters, tinfo.nentries); + if (ret) + return ret; + + pos = compat_ptr(tmp.entries); + return EBT_ENTRY_ITERATE(tinfo.entries, tinfo.entries_size, + compat_copy_entry_to_user, &pos, &tmp.entries_size); +} + +struct ebt_entries_buf_state { + char *buf_kern_start; /* kernel buffer to copy (translated) data to */ + u32 buf_kern_len; /* total size of kernel buffer */ + u32 buf_kern_offset; /* amount of data copied so far */ + u32 buf_user_offset; /* read position in userspace buffer */ +}; + +static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) +{ + state->buf_kern_offset += sz; + return state->buf_kern_offset >= sz ? 0 : -EINVAL; +} + +static int ebt_buf_add(struct ebt_entries_buf_state *state, + void *data, unsigned int sz) +{ + if (state->buf_kern_start == NULL) + goto count_only; + + BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len); + + memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); + + count_only: + state->buf_user_offset += sz; + return ebt_buf_count(state, sz); +} + +static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) +{ + char *b = state->buf_kern_start; + + BUG_ON(b && state->buf_kern_offset > state->buf_kern_len); + + if (b != NULL && sz > 0) + memset(b + state->buf_kern_offset, 0, sz); + /* do not adjust ->buf_user_offset here, we added kernel-side padding */ + return ebt_buf_count(state, sz); +} + +enum compat_mwt { + EBT_COMPAT_MATCH, + EBT_COMPAT_WATCHER, + EBT_COMPAT_TARGET, +}; + +static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, + enum compat_mwt compat_mwt, + struct ebt_entries_buf_state *state, + const unsigned char *base) +{ + char name[EBT_FUNCTION_MAXNAMELEN]; + struct xt_match *match; + struct xt_target *wt; + void *dst = NULL; + int off, pad = 0, ret = 0; + unsigned int size_kern, entry_offset, match_size = mwt->match_size; + + strlcpy(name, mwt->u.name, sizeof(name)); + + if (state->buf_kern_start) + dst = state->buf_kern_start + state->buf_kern_offset; + + entry_offset = (unsigned char *) mwt - base; + switch (compat_mwt) { + case EBT_COMPAT_MATCH: + match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, + name, 0), "ebt_%s", name); + if (match == NULL) + return -ENOENT; + if (IS_ERR(match)) + return PTR_ERR(match); + + off = ebt_compat_match_offset(match, match_size); + if (dst) { + if (match->compat_from_user) + match->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = match->matchsize; + if (unlikely(size_kern == -1)) + size_kern = match_size; + module_put(match->me); + break; + case EBT_COMPAT_WATCHER: /* fallthrough */ + case EBT_COMPAT_TARGET: + wt = try_then_request_module(xt_find_target(NFPROTO_BRIDGE, + name, 0), "ebt_%s", name); + if (wt == NULL) + return -ENOENT; + if (IS_ERR(wt)) + return PTR_ERR(wt); + off = xt_compat_target_offset(wt); + + if (dst) { + if (wt->compat_from_user) + wt->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = wt->targetsize; + module_put(wt->me); + break; + } + + if (!dst) { + ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, + off + ebt_compat_entry_padsize()); + if (ret < 0) + return ret; + } + + state->buf_kern_offset += match_size + off; + state->buf_user_offset += match_size; + pad = XT_ALIGN(size_kern) - size_kern; + + if (pad > 0 && dst) { + BUG_ON(state->buf_kern_len <= pad); + BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad); + memset(dst + size_kern, 0, pad); + } + return off + match_size; +} + +/* + * return size of all matches, watchers or target, including necessary + * alignment and padding. + */ +static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, + unsigned int size_left, enum compat_mwt type, + struct ebt_entries_buf_state *state, const void *base) +{ + int growth = 0; + char *buf; + + if (size_left == 0) + return 0; + + buf = (char *) match32; + + while (size_left >= sizeof(*match32)) { + struct ebt_entry_match *match_kern; + int ret; + + match_kern = (struct ebt_entry_match *) state->buf_kern_start; + if (match_kern) { + char *tmp; + tmp = state->buf_kern_start + state->buf_kern_offset; + match_kern = (struct ebt_entry_match *) tmp; + } + ret = ebt_buf_add(state, buf, sizeof(*match32)); + if (ret < 0) + return ret; + size_left -= sizeof(*match32); + + /* add padding before match->data (if any) */ + ret = ebt_buf_add_pad(state, ebt_compat_entry_padsize()); + if (ret < 0) + return ret; + + if (match32->match_size > size_left) + return -EINVAL; + + size_left -= match32->match_size; + + ret = compat_mtw_from_user(match32, type, state, base); + if (ret < 0) + return ret; + + BUG_ON(ret < match32->match_size); + growth += ret - match32->match_size; + growth += ebt_compat_entry_padsize(); + + buf += sizeof(*match32); + buf += match32->match_size; + + if (match_kern) + match_kern->match_size = ret; + + WARN_ON(type == EBT_COMPAT_TARGET && size_left); + match32 = (struct compat_ebt_entry_mwt *) buf; + } + + return growth; +} + +#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct compat_ebt_entry_mwt *__watcher; \ + \ + for (__i = e->watchers_offset; \ + __i < (e)->target_offset; \ + __i += __watcher->watcher_size + \ + sizeof(struct compat_ebt_entry_mwt)) { \ + __watcher = (void *)(e) + __i; \ + __ret = fn(__watcher , ## args); \ + if (__ret != 0) \ + break; \ + } \ + if (__ret == 0) { \ + if (__i != (e)->target_offset) \ + __ret = -EINVAL; \ + } \ + __ret; \ +}) + +#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct compat_ebt_entry_mwt *__match; \ + \ + for (__i = sizeof(struct ebt_entry); \ + __i < (e)->watchers_offset; \ + __i += __match->match_size + \ + sizeof(struct compat_ebt_entry_mwt)) { \ + __match = (void *)(e) + __i; \ + __ret = fn(__match , ## args); \ + if (__ret != 0) \ + break; \ + } \ + if (__ret == 0) { \ + if (__i != (e)->watchers_offset) \ + __ret = -EINVAL; \ + } \ + __ret; \ +}) + +/* called for all ebt_entry structures. */ +static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, + unsigned int *total, + struct ebt_entries_buf_state *state) +{ + unsigned int i, j, startoff, new_offset = 0; + /* stores match/watchers/targets & offset of next struct ebt_entry: */ + unsigned int offsets[4]; + unsigned int *offsets_update = NULL; + int ret; + char *buf_start; + + if (*total < sizeof(struct ebt_entries)) + return -EINVAL; + + if (!entry->bitmask) { + *total -= sizeof(struct ebt_entries); + return ebt_buf_add(state, entry, sizeof(struct ebt_entries)); + } + if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry)) + return -EINVAL; + + startoff = state->buf_user_offset; + /* pull in most part of ebt_entry, it does not need to be changed. */ + ret = ebt_buf_add(state, entry, + offsetof(struct ebt_entry, watchers_offset)); + if (ret < 0) + return ret; + + offsets[0] = sizeof(struct ebt_entry); /* matches come first */ + memcpy(&offsets[1], &entry->watchers_offset, + sizeof(offsets) - sizeof(offsets[0])); + + if (state->buf_kern_start) { + buf_start = state->buf_kern_start + state->buf_kern_offset; + offsets_update = (unsigned int *) buf_start; + } + ret = ebt_buf_add(state, &offsets[1], + sizeof(offsets) - sizeof(offsets[0])); + if (ret < 0) + return ret; + buf_start = (char *) entry; + /* + * 0: matches offset, always follows ebt_entry. + * 1: watchers offset, from ebt_entry structure + * 2: target offset, from ebt_entry structure + * 3: next ebt_entry offset, from ebt_entry structure + * + * offsets are relative to beginning of struct ebt_entry (i.e., 0). + */ + for (i = 0, j = 1 ; j < 4 ; j++, i++) { + struct compat_ebt_entry_mwt *match32; + unsigned int size; + char *buf = buf_start; + + buf = buf_start + offsets[i]; + if (offsets[i] > offsets[j]) + return -EINVAL; + + match32 = (struct compat_ebt_entry_mwt *) buf; + size = offsets[j] - offsets[i]; + ret = ebt_size_mwt(match32, size, i, state, base); + if (ret < 0) + return ret; + new_offset += ret; + if (offsets_update && new_offset) { + pr_debug("change offset %d to %d\n", + offsets_update[i], offsets[j] + new_offset); + offsets_update[i] = offsets[j] + new_offset; + } + } + + startoff = state->buf_user_offset - startoff; + + BUG_ON(*total < startoff); + *total -= startoff; + return 0; +} + +/* + * repl->entries_size is the size of the ebt_entry blob in userspace. + * It might need more memory when copied to a 64 bit kernel in case + * userspace is 32-bit. So, first task: find out how much memory is needed. + * + * Called before validation is performed. + */ +static int compat_copy_entries(unsigned char *data, unsigned int size_user, + struct ebt_entries_buf_state *state) +{ + unsigned int size_remaining = size_user; + int ret; + + ret = EBT_ENTRY_ITERATE(data, size_user, size_entry_mwt, data, + &size_remaining, state); + if (ret < 0) + return ret; + + WARN_ON(size_remaining); + return state->buf_kern_offset; +} + + +static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, + void __user *user, unsigned int len) +{ + struct compat_ebt_replace tmp; + int i; + + if (len < sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) + return -EINVAL; + + if (tmp.entries_size == 0) + return -EINVAL; + + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; + + memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); + + /* starting with hook_entry, 32 vs. 64 bit structures are different */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) + repl->hook_entry[i] = compat_ptr(tmp.hook_entry[i]); + + repl->num_counters = tmp.num_counters; + repl->counters = compat_ptr(tmp.counters); + repl->entries = compat_ptr(tmp.entries); + return 0; +} + +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) +{ + int ret, i, countersize, size64; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + struct ebt_entries_buf_state state; + void *entries_tmp; + + ret = compat_copy_ebt_replace_from_user(&tmp, user, len); + if (ret) { + /* try real handler in case userland supplied needed padding */ + if (ret == -EINVAL && do_replace(net, user, len) == 0) + ret = 0; + return ret; + } + + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + memset(&state, 0, sizeof(state)); + + newinfo->entries = vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + ret = -EFAULT; + goto free_entries; + } + + entries_tmp = newinfo->entries; + + xt_compat_lock(NFPROTO_BRIDGE); + + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + if (ret < 0) + goto out_unlock; + + pr_debug("tmp.entries_size %d, kern off %d, user off %d delta %d\n", + tmp.entries_size, state.buf_kern_offset, state.buf_user_offset, + xt_compat_calc_jump(NFPROTO_BRIDGE, tmp.entries_size)); + + size64 = ret; + newinfo->entries = vmalloc(size64); + if (!newinfo->entries) { + vfree(entries_tmp); + ret = -ENOMEM; + goto out_unlock; + } + + memset(&state, 0, sizeof(state)); + state.buf_kern_start = newinfo->entries; + state.buf_kern_len = size64; + + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + BUG_ON(ret < 0); /* parses same data again */ + + vfree(entries_tmp); + tmp.entries_size = size64; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + char __user *usrptr; + if (tmp.hook_entry[i]) { + unsigned int delta; + usrptr = (char __user *) tmp.hook_entry[i]; + delta = usrptr - tmp.entries; + usrptr += xt_compat_calc_jump(NFPROTO_BRIDGE, delta); + tmp.hook_entry[i] = (struct ebt_entries __user *)usrptr; + } + } + + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; +free_entries: + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + goto free_entries; +} + +static int compat_update_counters(struct net *net, void __user *user, + unsigned int len) +{ + struct compat_ebt_replace hlp; + + if (copy_from_user(&hlp, user, sizeof(hlp))) + return -EFAULT; + + /* try real handler in case userland supplied needed padding */ + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return update_counters(net, user, len); + + return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), + hlp.num_counters, user, len); +} + +static int compat_do_ebt_set_ctl(struct sock *sk, + int cmd, void __user *user, unsigned int len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case EBT_SO_SET_ENTRIES: + ret = compat_do_replace(sock_net(sk), user, len); + break; + case EBT_SO_SET_COUNTERS: + ret = compat_update_counters(sock_net(sk), user, len); + break; + default: + ret = -EINVAL; + } + return ret; +} + +static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, + void __user *user, int *len) +{ + int ret; + struct compat_ebt_replace tmp; + struct ebt_table *t; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* try real handler in case userland supplied needed padding */ + if ((cmd == EBT_SO_GET_INFO || + cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp)) + return do_ebt_get_ctl(sk, cmd, user, len); + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + xt_compat_lock(NFPROTO_BRIDGE); + switch (cmd) { + case EBT_SO_GET_INFO: + tmp.nentries = t->private->nentries; + ret = compat_table_info(t->private, &tmp); + if (ret) + goto out; + tmp.valid_hooks = t->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_INIT_INFO: + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + /* + * try real handler first in case of userland-side padding. + * in case we are dealing with an 'ordinary' 32 bit binary + * without 64bit compatibility padding, this will fail right + * after copy_from_user when the *len argument is validated. + * + * the compat_ variant needs to do one pass over the kernel + * data set to adjust for size differences before it the check. + */ + if (copy_everything_to_user(t, user, len, cmd) == 0) + ret = 0; + else + ret = compat_copy_everything_to_user(t, user, len, cmd); + break; + default: + ret = -EINVAL; + } + out: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + mutex_unlock(&ebt_mutex); + return ret; +} +#endif + static struct nf_sockopt_ops ebt_sockopts = { .pf = PF_INET, .set_optmin = EBT_BASE_CTL, .set_optmax = EBT_SO_SET_MAX + 1, .set = do_ebt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ebt_set_ctl, +#endif .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ebt_get_ctl, +#endif .owner = THIS_MODULE, }; |