From 80f8f1027b99660897bdeaeae73002185d829906 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Jan 2011 07:46:52 +0000 Subject: net: filter: dont block softirqs in sk_run_filter() Packet filter (BPF) doesnt need to disable softirqs, being fully re-entrant and lock-less. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/filter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index afc58374ca9..232b1873bb2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) if (err) return err; - rcu_read_lock_bh(); - filter = rcu_dereference_bh(sk->sk_filter); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } -- cgit v1.2.3-70-g09d2 From cbda10fa97d72c7a1923be4426171aa90e8c6dab Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 13 Jan 2011 23:38:30 +0000 Subject: net_device: add support for network device groups Net devices can now be grouped, enabling simpler manipulation from userspace. This patch adds a group field to the net_device structure, as well as rtnetlink support to query and modify it. Signed-off-by: Vlad Dogaru Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 7 +++++++ net/core/dev.c | 12 ++++++++++++ net/core/rtnetlink.c | 6 ++++++ 4 files changed, 26 insertions(+) (limited to 'net/core') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 6485d2a89be..f4a2e6b1b86 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -135,6 +135,7 @@ enum { IFLA_VF_PORTS, IFLA_PORT_SELF, IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d971346b034..68a4627b74f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -75,6 +75,9 @@ struct wireless_dev; #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +/* Initial net device group. All devices belong to group 0 by default. */ +#define INIT_NETDEV_GROUP 0 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -1153,6 +1156,9 @@ struct net_device { /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; + + /* group the device belongs to */ + int group; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1844,6 +1850,7 @@ extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); +extern void dev_set_group(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); extern int dev_hard_start_xmit(struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 7741507429f..2b85d4ae981 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4571,6 +4571,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL(dev_set_mtu); +/** + * dev_set_group - Change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + dev->group = new_group; +} +EXPORT_SYMBOL(dev_set_group); + /** * dev_set_mac_address - Change Media Access Control Address * @dev: device @@ -5678,6 +5689,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); + dev->group = INIT_NETDEV_GROUP; return dev; free_pcpu: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a5f7535aab5..09062b07bf7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_U32(skb, IFLA_GROUP, dev->group); if (dev->ifindex != dev->iflink) NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); @@ -1265,6 +1266,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_GROUP]) { + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + modified = 1; + } + /* * Interface selected by interface index but interface * name provided implies that a name change has been -- cgit v1.2.3-70-g09d2 From e7ed828f10bd89a28f821ae7f20e691704d61923 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 13 Jan 2011 23:38:31 +0000 Subject: netlink: support setting devgroup parameters If a rtnetlink request specifies a negative or zero ifindex and has no interface name attribute, but has a group attribute, then the chenges are made to all the interfaces belonging to the specified group. Signed-off-by: Vlad Dogaru Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09062b07bf7..a0b2eeb3b61 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1558,6 +1558,24 @@ err: } EXPORT_SYMBOL(rtnl_create_link); +static int rtnl_group_changelink(struct net *net, int group, + struct ifinfomsg *ifm, + struct nlattr **tb) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + if (dev->group == group) { + err = do_setlink(dev, ifm, tb, NULL, 0); + if (err < 0) + return err; + } + } + + return 0; +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1585,10 +1603,16 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; + else { + if (ifname[0]) + dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); + else + dev = NULL; + } err = validate_linkmsg(dev, tb); if (err < 0) -- cgit v1.2.3-70-g09d2 From 4f57c087de9b46182545676d2c594120a20f2e58 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jan 2011 08:06:04 +0000 Subject: net: implement mechanism for HW based QOS This patch provides a mechanism for lower layer devices to steer traffic using skb->priority to tx queues. This allows for hardware based QOS schemes to use the default qdisc without incurring the penalties related to global state and the qdisc lock. While reliably receiving skbs on the correct tx ring to avoid head of line blocking resulting from shuffling in the LLD. Finally, all the goodness from txq caching and xps/rps can still be leveraged. Many drivers and hardware exist with the ability to implement QOS schemes in the hardware but currently these drivers tend to rely on firmware to reroute specific traffic, a driver specific select_queue or the queue_mapping action in the qdisc. By using select_queue for this drivers need to be updated for each and every traffic type and we lose the goodness of much of the upstream work. Firmware solutions are inherently inflexible. And finally if admins are expected to build a qdisc and filter rules to steer traffic this requires knowledge of how the hardware is currently configured. The number of tx queues and the queue offsets may change depending on resources. Also this approach incurs all the overhead of a qdisc with filters. With the mechanism in this patch users can set skb priority using expected methods ie setsockopt() or the stack can set the priority directly. Then the skb will be steered to the correct tx queues aligned with hardware QOS traffic classes. In the normal case with single traffic class and all queues in this class everything works as is until the LLD enables multiple tcs. To steer the skb we mask out the lower 4 bits of the priority and allow the hardware to configure upto 15 distinct classes of traffic. This is expected to be sufficient for most applications at any rate it is more then the 8021Q spec designates and is equal to the number of prio bands currently implemented in the default qdisc. This in conjunction with a userspace application such as lldpad can be used to implement 8021Q transmission selection algorithms one of these algorithms being the extended transmission selection algorithm currently being used for DCB. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 55 +++++++++++++++++++++++++++++++++++++- 2 files changed, 122 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 68a4627b74f..371fa8839d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -646,6 +646,14 @@ struct xps_dev_maps { (nr_cpu_ids * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ +#define TC_MAX_QUEUE 16 +#define TC_BITMASK 15 +/* HW offloaded queuing disciplines txq count and offset maps */ +struct netdev_tc_txq { + u16 count; + u16 offset; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -756,6 +764,11 @@ struct xps_dev_maps { * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) + * Called to setup 'tc' number of traffic classes in the net device. This + * is always called from the stack with the rtnl lock held and netif tx + * queues stopped. This allows the netdevice to perform queue management + * safely. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -814,6 +827,7 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); @@ -1146,6 +1160,9 @@ struct net_device { /* Data Center Bridging netlink ops */ const struct dcbnl_rtnl_ops *dcbnl_ops; #endif + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + u8 prio_tc_map[TC_BITMASK + 1]; #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* max exchange id for FCoE LRO by ddp */ @@ -1164,6 +1181,57 @@ struct net_device { #define NETDEV_ALIGN 32 +static inline +int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) +{ + return dev->prio_tc_map[prio & TC_BITMASK]; +} + +static inline +int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) +{ + if (tc >= dev->num_tc) + return -EINVAL; + + dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; + return 0; +} + +static inline +void netdev_reset_tc(struct net_device *dev) +{ + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} + +static inline +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} + +static inline +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + + dev->num_tc = num_tc; + return 0; +} + +static inline +int netdev_get_num_tc(struct net_device *dev) +{ + return dev->num_tc; +} + static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, unsigned int index) diff --git a/net/core/dev.c b/net/core/dev.c index 2b85d4ae981..8b1d886ed23 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1593,6 +1593,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change + * @dev: Network device + * @txq: number of queues available + * + * If real_num_tx_queues is changed the tc mappings may no longer be + * valid. To resolve this verify the tc mapping remains valid and if + * not NULL the mapping. With no priorities mapping to this + * offset/count pair it will no longer be used. In the worst case TC0 + * is invalid nothing can be done so disable priority mappings. If is + * expected that drivers will fix this mapping if they can before + * calling netif_set_real_num_tx_queues. + */ +void netif_setup_tc(struct net_device *dev, unsigned int txq) +{ + int i; + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + + /* If TC0 is invalidated disable TC mapping */ + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues changed " + "invalidating tc mappings. Priority " + "traffic classification disabled!\n"); + dev->num_tc = 0; + return; + } + + /* Invalidated prio to tc mappings set to TC0 */ + for (i = 1; i < TC_BITMASK + 1; i++) { + int q = netdev_get_prio_tc_map(dev, i); + + tc = &dev->tc_to_txq[q]; + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues " + "changed. Priority %i to tc " + "mapping %i is no longer valid " + "setting map to 0\n", + i, q); + netdev_set_prio_tc_map(dev, i, 0); + } + } +} + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1612,6 +1654,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (rc) return rc; + if (dev->num_tc) + netif_setup_tc(dev, txq); + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2161,6 +2206,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); @@ -2169,13 +2216,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, return hash; } + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); - return (u16) (((u64) hash * num_tx_queues) >> 32); + return (u16) (((u64) hash * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); -- cgit v1.2.3-70-g09d2 From 3fbd8758b027995b677046dae46f9b41ea88c88f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 21:23:22 +0000 Subject: net: dev_close_many() is static Signed-off-by: Eric Dumazet CC: Octavian Purdila Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 8b1d886ed23..a4ccd47f319 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1285,7 +1285,7 @@ static int __dev_close(struct net_device *dev) return __dev_close_many(&single); } -int dev_close_many(struct list_head *head) +static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(tmp_list); -- cgit v1.2.3-70-g09d2 From a2da570d62fcb9e8816f6920e1ec02c706b289fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 03:48:19 +0000 Subject: net_sched: RCU conversion of stab This patch converts stab qdisc management to RCU, so that we can perform the qdisc_calculate_pkt_len() call before getting qdisc lock. This shortens the lock's held time in __dev_xmit_skb(). This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of cache misses and so reducing latencies. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Jesper Dangaard Brouer CC: Jarek Poplawski CC: Jamal Hadi Salim CC: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/sch_generic.h | 21 +++++++++++++++------ net/core/dev.c | 8 +++++--- net/sched/sch_api.c | 26 +++++++++++++++++--------- net/sched/sch_generic.c | 2 +- 4 files changed, 38 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f6345f55041..d531baa2506 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,7 @@ enum qdisc___state_t { }; struct qdisc_size_table { + struct rcu_head rcu; struct list_head list; struct tc_sizespec szopts; int refcnt; @@ -53,7 +54,7 @@ struct Qdisc { #define TCQ_F_WARN_NONWC (1 << 16) int padded; struct Qdisc_ops *ops; - struct qdisc_size_table *stab; + struct qdisc_size_table __rcu *stab; struct list_head list; u32 handle; u32 parent; @@ -349,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); -extern void qdisc_calculate_pkt_len(struct sk_buff *skb, - struct qdisc_size_table *stab); +extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab); extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); @@ -429,12 +430,20 @@ enum net_xmit_qdisc_t { #define net_xmit_drop_count(e) (1) #endif -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct Qdisc *sch) { #ifdef CONFIG_NET_SCHED - if (sch->stab) - qdisc_calculate_pkt_len(skb, sch->stab); + struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); + + if (stab) + __qdisc_calculate_pkt_len(skb, stab); #endif +} + +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + qdisc_calculate_pkt_len(skb, sch); return sch->enqueue(skb, sch); } diff --git a/net/core/dev.c b/net/core/dev.c index a4ccd47f319..2730352d2cc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2325,15 +2325,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); - bool contended = qdisc_is_running(q); + bool contended; int rc; + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. * This permits __QDISC_STATE_RUNNING owner to get the lock more often * and dequeue packets faster. */ + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -2351,7 +2354,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { @@ -2366,7 +2368,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); - rc = qdisc_enqueue_root(skb, q); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 374fcbef80e..15074157940 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) return stab; } +static void stab_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct qdisc_size_table, rcu)); +} + void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) @@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - kfree(tab); + call_rcu_bh(&tab->rcu, stab_kfree_rcu); } spin_unlock(&qdisc_stab_lock); @@ -430,7 +435,7 @@ nla_put_failure: return -1; } -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -456,7 +461,7 @@ out: pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } -EXPORT_SYMBOL(qdisc_calculate_pkt_len); +EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { @@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, err = PTR_ERR(stab); goto err_out4; } - sch->stab = stab; + rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { spinlock_t *root_lock; @@ -875,7 +880,7 @@ err_out4: * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. */ - qdisc_put_stab(sch->stab); + qdisc_put_stab(rtnl_dereference(sch->stab)); if (ops->destroy) ops->destroy(sch); goto err_out3; @@ -883,7 +888,7 @@ err_out4: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - struct qdisc_size_table *stab = NULL; + struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { @@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) return PTR_ERR(stab); } - qdisc_put_stab(sch->stab); - sch->stab = stab; + ostab = rtnl_dereference(sch->stab); + rcu_assign_pointer(sch->stab, stab); + qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator @@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; + struct qdisc_size_table *stab; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f1cb62130d..cc17e794c41 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -632,7 +632,7 @@ void qdisc_destroy(struct Qdisc *qdisc) #ifdef CONFIG_NET_SCHED qdisc_list_del(qdisc); - qdisc_put_stab(qdisc->stab); + qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) -- cgit v1.2.3-70-g09d2 From 6193d2be290990b789021e06fa770ecb45319f2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 22:02:47 +0000 Subject: neigh: __rcu annotations fix some minor issues and sparse (__rcu) warnings Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a90291342..799f06e03a2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { size_t size = entries * sizeof(struct neighbour *); struct neigh_hash_table *ret; - struct neighbour **buckets; + struct neighbour __rcu **buckets; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) if (size <= PAGE_SIZE) buckets = kzalloc(size, GFP_ATOMIC); else - buckets = (struct neighbour **) + buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); if (!buckets) { kfree(ret); return NULL; } - rcu_assign_pointer(ret->hash_buckets, buckets); + ret->hash_buckets = buckets; ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); - struct neighbour **buckets = nht->hash_buckets; + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) kfree(buckets); @@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->nht = neigh_hash_alloc(8); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); -- cgit v1.2.3-70-g09d2 From ffa934f192c8381061242eb170419266ef229902 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Jan 2011 03:00:42 +0000 Subject: rtnetlink: fix link attribute validation with IFLA_GROUP rtnl_group_changelink() is invoked by rtnl_newlink() before the link attributes have been validated. Additionally the group changes are performed even if NLM_F_CREATE is specified and a new link is created, while more reasonable semantics would be to set the group value on the newly created link. Fix both problems by moving the rtnl_group_changelink() invocation down to the handling of non-existant links without NLM_F_CREATE() and add a dev_set_group() call to rtnl_create_link(). Signed-off-by: Patrick McHardy Acked-by: Vlad Dogaru Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a0b2eeb3b61..310eb804e09 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1548,6 +1548,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (tb[IFLA_GROUP]) + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); return dev; @@ -1606,10 +1608,6 @@ replay: else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); - else if (tb[IFLA_GROUP]) - return rtnl_group_changelink(net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); else dev = NULL; } @@ -1676,8 +1674,13 @@ replay: return do_setlink(dev, ifm, tb, ifname, modified); } - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); return -ENODEV; + } if (ifm->ifi_index) return -EOPNOTSUPP; -- cgit v1.2.3-70-g09d2 From bb134d2298b49f50cf6d9388410fba96272905dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 19:18:08 +0000 Subject: net: netif_setup_tc() is static Signed-off-by: Eric Dumazet Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 2730352d2cc..47d3d78d541 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1605,7 +1605,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * expected that drivers will fix this mapping if they can before * calling netif_set_real_num_tx_queues. */ -void netif_setup_tc(struct net_device *dev, unsigned int txq) +static void netif_setup_tc(struct net_device *dev, unsigned int txq) { int i; struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; -- cgit v1.2.3-70-g09d2 From c445477d74ab3779d1386ab797fbb9b628eb9f64 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Jan 2011 11:03:53 +0000 Subject: net: RPS: Enable hardware acceleration of RFS Allow drivers for multiqueue hardware with flow filter tables to accelerate RFS. The driver must: 1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion IRQs (in queue order). This will provide a mapping from CPUs to the queues for which completions are handled nearest to them. 2. Implement net_device_ops::ndo_rx_flow_steer. This operation adds or replaces a filter steering the given flow to the given RX queue, if possible. 3. Periodically remove filters for which rps_may_expire_flow() returns true. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 33 ++++++++++++++-- net/Kconfig | 6 +++ net/core/dev.c | 97 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 127 insertions(+), 9 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 371fa8839d5..a335f202269 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -554,14 +554,16 @@ struct rps_map { #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16))) /* - * The rps_dev_flow structure contains the mapping of a flow to a CPU and the - * tail pointer for that CPU's input queue at the time of last enqueue. + * The rps_dev_flow structure contains the mapping of a flow to a CPU, the + * tail pointer for that CPU's input queue at the time of last enqueue, and + * a hardware filter index. */ struct rps_dev_flow { u16 cpu; - u16 fill; + u16 filter; unsigned int last_qtail; }; +#define RPS_NO_FILTER 0xffff /* * The rps_dev_flow_table structure contains a table of flow mappings. @@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; +#ifdef CONFIG_RFS_ACCEL +extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id); +#endif + /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct rps_map __rcu *rps_map; @@ -769,6 +776,13 @@ struct netdev_tc_txq { * is always called from the stack with the rtnl lock held and netif tx * queues stopped. This allows the netdevice to perform queue management * safely. + * + * RFS acceleration. + * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, + * u16 rxq_index, u32 flow_id); + * Set hardware filter for RFS. rxq_index is the target queue index; + * flow_id is a flow ID to be passed to rps_may_expire_flow() later. + * Return the filter ID on success, or a negative error code. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -842,6 +856,12 @@ struct net_device_ops { int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); #endif +#ifdef CONFIG_RFS_ACCEL + int (*ndo_rx_flow_steer)(struct net_device *dev, + const struct sk_buff *skb, + u16 rxq_index, + u32 flow_id); +#endif }; /* @@ -1056,6 +1076,13 @@ struct net_device { /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; + +#ifdef CONFIG_RFS_ACCEL + /* CPU reverse-mapping for RX completion interrupts, indexed + * by RX queue number. Assigned by driver. This must only be + * set if the ndo_rx_flow_steer operation is defined. */ + struct cpu_rmap *rx_cpu_rmap; +#endif #endif rx_handler_func_t __rcu *rx_handler; diff --git a/net/Kconfig b/net/Kconfig index 72840626284..79cabf1ee68 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -221,6 +221,12 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config RFS_ACCEL + boolean + depends on RPS && GENERIC_HARDIRQS + select CPU_RMAP + default y + config XPS boolean depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS diff --git a/net/core/dev.c b/net/core/dev.c index d162ba8d622..aa761472f9e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +static struct rps_dev_flow * +set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) +{ + u16 tcpu; + + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) { +#ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *old_rflow; + u32 flow_id; + u16 rxq_index; + int rc; + + /* Should we steer this flow to a different hardware queue? */ + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) + goto out; + rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); + if (rxq_index == skb_get_rx_queue(skb)) + goto out; + + rxqueue = dev->_rx + rxq_index; + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (!flow_table) + goto out; + flow_id = skb->rxhash & flow_table->mask; + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, + rxq_index, flow_id); + if (rc < 0) + goto out; + old_rflow = rflow; + rflow = &flow_table->flows[flow_id]; + rflow->cpu = next_cpu; + rflow->filter = rc; + if (old_rflow->filter == rflow->filter) + old_rflow->filter = RPS_NO_FILTER; + out: +#endif + rflow->last_qtail = + per_cpu(softnet_data, tcpu).input_queue_head; + } + + return rflow; +} + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. @@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) - rflow->last_qtail = per_cpu(softnet_data, - tcpu).input_queue_head; - } + rflow->last_qtail)) >= 0)) + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; @@ -2684,6 +2729,46 @@ done: return cpu; } +#ifdef CONFIG_RFS_ACCEL + +/** + * rps_may_expire_flow - check whether an RFS hardware filter may be removed + * @dev: Device on which the filter was set + * @rxq_index: RX queue index + * @flow_id: Flow ID passed to ndo_rx_flow_steer() + * @filter_id: Filter ID returned by ndo_rx_flow_steer() + * + * Drivers that implement ndo_rx_flow_steer() should periodically call + * this function for each installed filter and remove the filters for + * which it returns %true. + */ +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id) +{ + struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; + int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); + if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) + expire = false; + } + rcu_read_unlock(); + return expire; +} +EXPORT_SYMBOL(rps_may_expire_flow); + +#endif /* CONFIG_RFS_ACCEL */ + /* Called from hardirq (IPI) context */ static void rps_trigger_softirq(void *data) { -- cgit v1.2.3-70-g09d2 From 57422dc530115e427dff464cc0a32bcd0efb5008 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Sat, 22 Jan 2011 12:14:12 +0000 Subject: net: Move check of checksum features to netdev_fix_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index aa761472f9e..ad374189858 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5215,6 +5215,23 @@ static void rollback_registered(struct net_device *dev) unsigned long netdev_fix_features(unsigned long features, const char *name) { + /* Fix illegal checksum combinations */ + if ((features & NETIF_F_HW_CSUM) && + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (name) + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + name); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((features & NETIF_F_NO_CSUM) && + (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (name) + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + name); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { @@ -5390,21 +5407,6 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } - - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ -- cgit v1.2.3-70-g09d2 From 04ed3e741d0f133e02bed7fa5c98edba128f90e7 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Mon, 24 Jan 2011 15:32:47 -0800 Subject: net: change netdev->features to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting Ben Hutchings: we presumably won't be defining features that can only be enabled on 64-bit architectures. Occurences found by `grep -r` on net/, drivers/net, include/ [ Move features and vlan_features next to each other in struct netdev, as per Eric Dumazet's suggestion -DaveM ] Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 2 +- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/myri10ge/myri10ge.c | 4 ++-- drivers/net/sfc/ethtool.c | 4 ++-- drivers/net/sfc/net_driver.h | 2 +- drivers/net/tun.c | 2 +- include/linux/netdevice.h | 24 ++++++++++++------------ include/linux/skbuff.h | 2 +- include/net/protocol.h | 4 ++-- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/8021q/vlan.c | 2 +- net/bridge/br_if.c | 2 +- net/bridge/br_private.h | 2 +- net/core/dev.c | 15 +++++++-------- net/core/ethtool.c | 2 +- net/core/net-sysfs.c | 2 +- net/core/skbuff.c | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/udp.c | 2 +- 23 files changed, 45 insertions(+), 46 deletions(-) (limited to 'net/core') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index df99edf3464..cab96fa4cd3 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -8312,7 +8312,7 @@ static const struct net_device_ops bnx2_netdev_ops = { #endif }; -static void inline vlan_features_add(struct net_device *dev, unsigned long flags) +static void inline vlan_features_add(struct net_device *dev, u32 flags) { dev->vlan_features |= flags; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 163e0b06eaa..7047b406b8b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1372,8 +1372,8 @@ static int bond_compute_features(struct bonding *bond) { struct slave *slave; struct net_device *bond_dev = bond->dev; - unsigned long features = bond_dev->features; - unsigned long vlan_features = 0; + u32 features = bond_dev->features; + u32 vlan_features = 0; unsigned short max_hard_header_len = max((u16)ETH_HLEN, bond_dev->hard_header_len); int i; diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index ea5cfe2c3a0..a7f2eed9a08 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -253,7 +253,7 @@ struct myri10ge_priv { unsigned long serial_number; int vendor_specific_offset; int fw_multicast_support; - unsigned long features; + u32 features; u32 max_tso6; u32 read_dma; u32 write_dma; @@ -1776,7 +1776,7 @@ static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled) static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled) { struct myri10ge_priv *mgp = netdev_priv(netdev); - unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO); + u32 flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO); if (tso_enabled) netdev->features |= flags; diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index 0e8bb19ed60..713969accdb 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -502,7 +502,7 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, static int efx_ethtool_set_tso(struct net_device *net_dev, u32 enable) { struct efx_nic *efx __attribute__ ((unused)) = netdev_priv(net_dev); - unsigned long features; + u32 features; features = NETIF_F_TSO; if (efx->type->offload_features & NETIF_F_V6_CSUM) @@ -519,7 +519,7 @@ static int efx_ethtool_set_tso(struct net_device *net_dev, u32 enable) static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable) { struct efx_nic *efx = netdev_priv(net_dev); - unsigned long features = efx->type->offload_features & NETIF_F_ALL_CSUM; + u32 features = efx->type->offload_features & NETIF_F_ALL_CSUM; if (enable) net_dev->features |= features; diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 28df8665256..c65270241d2 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -906,7 +906,7 @@ struct efx_nic_type { unsigned int phys_addr_channels; unsigned int tx_dc_base; unsigned int rx_dc_base; - unsigned long offload_features; + u32 offload_features; u32 reset_world_flags; }; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b100bd50a0d..55786a0efc4 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1142,7 +1142,7 @@ static int tun_get_iff(struct net *net, struct tun_struct *tun, * privs required. */ static int set_offload(struct net_device *dev, unsigned long arg) { - unsigned int old_features, features; + u32 old_features, features; old_features = dev->features; /* Unset features, set them as we chew on the arg. */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a335f202269..0de3c59720f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -914,7 +914,11 @@ struct net_device { struct list_head unreg_list; /* Net device features */ - unsigned long features; + u32 features; + + /* VLAN feature mask */ + u32 vlan_features; + #define NETIF_F_SG 1 /* Scatter/gather IO. */ #define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ @@ -1176,9 +1180,6 @@ struct net_device { /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; - /* VLAN feature mask */ - unsigned long vlan_features; - /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; @@ -1401,7 +1402,7 @@ struct packet_type { struct packet_type *, struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, - int features); + u32 features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); @@ -2370,7 +2371,7 @@ extern int netdev_tstamp_prequeue; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); -extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); +extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); #else @@ -2397,22 +2398,21 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask); -unsigned long netdev_fix_features(unsigned long features, const char *name); +u32 netdev_increment_features(u32 all, u32 one, u32 mask); +u32 netdev_fix_features(u32 features, const char *name); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -int netif_skb_features(struct sk_buff *skb); +u32 netif_skb_features(struct sk_buff *skb); -static inline int net_gso_ok(int features, int gso_type) +static inline int net_gso_ok(u32 features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; return (features & feature) == feature; } -static inline int skb_gso_ok(struct sk_buff *skb, int features) +static inline int skb_gso_ok(struct sk_buff *skb, u32 features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e946da9d1d..31f02d0b46a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1877,7 +1877,7 @@ extern void skb_split(struct sk_buff *skb, extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); -extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/include/net/protocol.h b/include/net/protocol.h index dc07495bce4..6f7eb800974 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -38,7 +38,7 @@ struct net_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, - int features); + u32 features); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); @@ -57,7 +57,7 @@ struct inet6_protocol { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, - int features); + u32 features); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index 38509f04738..917911165e3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1404,7 +1404,7 @@ extern struct request_sock_ops tcp6_request_sock_ops; extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); -extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features); extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb); extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, diff --git a/include/net/udp.h b/include/net/udp.h index bb967dd59bf..e82f3a8c0f8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -245,5 +245,5 @@ extern void udp4_proc_exit(void); extern void udp_init(void); extern int udp4_ufo_send_check(struct sk_buff *skb); -extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features); +extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features); #endif /* _UDP_H */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6e64f7c6a2e..7850412f52b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { - unsigned long old_features = vlandev->features; + u32 old_features = vlandev->features; vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index d9d1e2bac1d..52ce4a30f8b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -365,7 +365,7 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, mask; + u32 features, mask; features = mask = br->feature_mask; if (list_empty(&br->port_list)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 84aac7734bf..9f22898c535 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -182,7 +182,7 @@ struct net_bridge struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - unsigned long feature_mask; + u32 feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; bool nf_call_iptables; diff --git a/net/core/dev.c b/net/core/dev.c index ad374189858..7103f89fde0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1858,7 +1858,7 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -2046,7 +2046,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; @@ -2058,10 +2058,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features return features; } -int netif_skb_features(struct sk_buff *skb) +u32 netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - int features = skb->dev->features; + u32 features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2106,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - int features; + u32 features; /* * If device doesnt need skb->dst, release it right now while @@ -5213,7 +5213,7 @@ static void rollback_registered(struct net_device *dev) rollback_registered_many(&single); } -unsigned long netdev_fix_features(unsigned long features, const char *name) +u32 netdev_fix_features(u32 features, const char *name) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -6143,8 +6143,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) +u32 netdev_increment_features(u32 all, u32 one, u32 mask) { /* If device needs checksumming, downgrade to it. */ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17741782a34..bd1af99e112 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1458,7 +1458,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - unsigned long old_features; + u32 old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e23c01be5a5..81367ccf330 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_long_hex); +NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d31bb36ae0d..436c4c43924 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2497,7 +2497,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2507,7 +2507,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) unsigned int offset = doffset; unsigned int headroom; unsigned int len; - int sg = features & NETIF_F_SG; + int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b61107df6..e5e2d9d64ab 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1215,7 +1215,7 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262..f9867d2dbef 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959e..d37baaa1dbe 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2199,7 +2199,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 978e80e2c4a..3194aa90987 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -772,7 +772,7 @@ out: return err; } -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9a009c66c8a..a419a787eb6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1299,7 +1299,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb) return 0; } -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; -- cgit v1.2.3-70-g09d2 From acd1130e8793fb150fb522da8ec51675839eb4b1 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Mon, 24 Jan 2011 15:45:15 -0800 Subject: net: reduce and unify printk level in netdev_fix_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce printk() levels to KERN_INFO in netdev_fix_features() as this will be used by ethtool and might spam dmesg unnecessarily. This converts the function to use netdev_info() instead of plain printk(). As a side effect, bonding and bridge devices will now log dropped features on every slave device change. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++-- include/linux/netdevice.h | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 33 ++++++++++++--------------------- 4 files changed, 16 insertions(+), 25 deletions(-) (limited to 'net/core') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7047b406b8b..1df9f0ea918 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1400,8 +1400,8 @@ static int bond_compute_features(struct bonding *bond) done: features |= (bond_dev->features & BOND_VLAN_FEATURES); - bond_dev->features = netdev_fix_features(features, NULL); - bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL); + bond_dev->features = netdev_fix_features(bond_dev, features); + bond_dev->vlan_features = netdev_fix_features(bond_dev, vlan_features); bond_dev->hard_header_len = max_hard_header_len; return 0; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0de3c59720f..8858422c5c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2399,7 +2399,7 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); u32 netdev_increment_features(u32 all, u32 one, u32 mask); -u32 netdev_fix_features(u32 features, const char *name); +u32 netdev_fix_features(struct net_device *dev, u32 features); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 52ce4a30f8b..2a6801d8b72 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -379,7 +379,7 @@ void br_features_recompute(struct net_bridge *br) } done: - br->dev->features = netdev_fix_features(features, NULL); + br->dev->features = netdev_fix_features(br->dev, features); } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index 7103f89fde0..1b4c07fe295 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5213,58 +5213,49 @@ static void rollback_registered(struct net_device *dev) rollback_registered_many(&single); } -u32 netdev_fix_features(u32 features, const char *name) +u32 netdev_fix_features(struct net_device *dev, u32 features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - name); + netdev_info(dev, "mixed HW and IP checksum settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } if ((features & NETIF_F_NO_CSUM) && (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - name); + netdev_info(dev, "mixed no checksumming and other settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); + netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); features &= ~NETIF_F_TSO; } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no checksum offload features.\n", - name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } } @@ -5407,7 +5398,7 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - dev->features = netdev_fix_features(dev->features, dev->name); + dev->features = netdev_fix_features(dev, dev->features); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) -- cgit v1.2.3-70-g09d2 From a512b92b3af4b03fc6834617a042dc85fbd4e34e Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Mon, 24 Jan 2011 03:37:29 +0000 Subject: net: add sysfs entry for device group The group of a network device can be queried or changed from userspace using sysfs. For example, considering sysfs mounted in /sys, one can change the group that interface lo belongs to: echo 1 > /sys/class/net/lo/group Signed-off-by: Vlad Dogaru Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 81367ccf330..2e4a393dfc3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev, return ret; } +NETDEVICE_SHOW(group, fmt_dec); + +static int change_group(struct net_device *net, unsigned long new_group) +{ + dev_set_group(net, (int) new_group); + return 0; +} + +static ssize_t store_group(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_group); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), @@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; -- cgit v1.2.3-70-g09d2 From 26ad787962ef84677a48c56039d3c9769b84f847 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 13:26:05 -0800 Subject: pktgen: speedup fragmented skbs We spend lot of time clearing pages in pktgen. (Or not clearing them on ipv6 and leaking kernel memory) Since we dont modify them, we can use one zeroed page, and get references on it. This page can use NUMA affinity as well. Define pktgen_finalize_skb() helper, used both in ipv4 and ipv6 Results using skbs with one frag : Before patch : Result: OK: 608980458(c608978520+d1938) nsec, 1000000000 (100byte,1frags) 1642088pps 1313Mb/sec (1313670400bps) errors: 0 After patch : Result: OK: 345285014(c345283891+d1123) nsec, 1000000000 (100byte,1frags) 2896158pps 2316Mb/sec (2316926400bps) errors: 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/pktgen.c | 234 ++++++++++++++++++++++-------------------------------- 1 file changed, 93 insertions(+), 141 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4c461..d73b77adb67 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -251,6 +251,7 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + struct page *page; u64 delay; /* nano-seconds */ __u64 count; /* Default No packets to send */ @@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file, if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); + if (pkt_dev->page) { + put_page(pkt_dev->page); + pkt_dev->page = NULL; + } } else sprintf(pg_result, "ERROR: node not possible"); @@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, return htons(id | (cfi << 12) | (prio << 13)); } +static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, + int datalen) +{ + struct timeval timestamp; + struct pktgen_hdr *pgh; + + pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + datalen -= sizeof(*pgh); + + if (pkt_dev->nfrags <= 0) { + pgh = (struct pktgen_hdr *)skb_put(skb, datalen); + memset(pgh + 1, 0, datalen); + } else { + int frags = pkt_dev->nfrags; + int i, len; + + + if (frags > MAX_SKB_FRAGS) + frags = MAX_SKB_FRAGS; + len = datalen - frags * PAGE_SIZE; + if (len > 0) { + memset(skb_put(skb, len), 0, len); + datalen = frags * PAGE_SIZE; + } + + i = 0; + while (datalen > 0) { + if (unlikely(!pkt_dev->page)) { + int node = numa_node_id(); + + if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) + node = pkt_dev->node; + pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!pkt_dev->page) + break; + } + skb_shinfo(skb)->frags[i].page = pkt_dev->page; + get_page(pkt_dev->page); + skb_shinfo(skb)->frags[i].page_offset = 0; + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + datalen -= skb_shinfo(skb)->frags[i].size; + skb->len += skb_shinfo(skb)->frags[i].size; + skb->data_len += skb_shinfo(skb)->frags[i].size; + i++; + skb_shinfo(skb)->nr_frags = i; + } + + while (i < frags) { + int rem; + + if (i == 0) + break; + + rem = skb_shinfo(skb)->frags[i - 1].size / 2; + if (rem == 0) + break; + + skb_shinfo(skb)->frags[i - 1].size -= rem; + + skb_shinfo(skb)->frags[i] = + skb_shinfo(skb)->frags[i - 1]; + get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = + skb_shinfo(skb)->frags[i - 1].page; + skb_shinfo(skb)->frags[i].page_offset += + skb_shinfo(skb)->frags[i - 1].size; + skb_shinfo(skb)->frags[i].size = rem; + i++; + skb_shinfo(skb)->nr_frags = i; + } + } + + /* Stamp the time, and sequence number, + * convert them to network byte order + */ + pgh->pgh_magic = htonl(PKTGEN_MAGIC); + pgh->seq_num = htonl(pkt_dev->seq_num); + + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct udphdr *udph; int datalen, iplen; struct iphdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IP); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); - } else { - int frags = pkt_dev->nfrags; - int i, len; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - len = datalen - frags * PAGE_SIZE; - memset(skb_put(skb, len), 0, len); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } + pktgen_finalize_skb(pkt_dev, skb, datalen); #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) @@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct udphdr *udph; int datalen; struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - * should we update cloned packets too ? - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pktgen_finalize_skb(pkt_dev, skb, datalen); return skb; } @@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); + if (pkt_dev->page) + put_page(pkt_dev->page); kfree(pkt_dev); return 0; } -- cgit v1.2.3-70-g09d2 From 62fa8a846d7de4b299232e330c74b7783539df76 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Jan 2011 20:51:05 -0800 Subject: net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller --- include/net/dst.h | 114 ++++++++++++++++++++++++++++++++---------------- include/net/dst_ops.h | 1 + include/net/route.h | 2 + net/core/dst.c | 39 +++++++++++++++++ net/decnet/dn_route.c | 18 +++++--- net/ipv4/route.c | 45 ++++++++++++++++++- net/ipv4/xfrm4_policy.c | 4 ++ net/ipv6/route.c | 15 +++++-- net/ipv6/xfrm6_policy.c | 2 + 9 files changed, 194 insertions(+), 46 deletions(-) (limited to 'net/core') diff --git a/include/net/dst.h b/include/net/dst.h index be5a0d4c491..94a8c234ea2 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -40,24 +40,10 @@ struct dst_entry { struct rcu_head rcu_head; struct dst_entry *child; struct net_device *dev; - short error; - short obsolete; - int flags; -#define DST_HOST 0x0001 -#define DST_NOXFRM 0x0002 -#define DST_NOPOLICY 0x0004 -#define DST_NOHASH 0x0008 -#define DST_NOCACHE 0x0010 + struct dst_ops *ops; + unsigned long _metrics; unsigned long expires; - - unsigned short header_len; /* more space at head required */ - unsigned short trailer_len; /* space to reserve at tail */ - - unsigned int rate_tokens; - unsigned long rate_last; /* rate limiting for ICMP */ - struct dst_entry *path; - struct neighbour *neighbour; struct hh_cache *hh; #ifdef CONFIG_XFRM @@ -68,17 +54,16 @@ struct dst_entry { int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); - struct dst_ops *ops; - - u32 _metrics[RTAX_MAX]; - + short error; + short obsolete; + unsigned short header_len; /* more space at head required */ + unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID __u32 tclassid; #else __u32 __pad2; #endif - /* * Align __refcnt to a 64 bytes alignment * (L1_CACHE_SIZE would be too much) @@ -93,6 +78,14 @@ struct dst_entry { atomic_t __refcnt; /* client references */ int __use; unsigned long lastuse; + unsigned long rate_last; /* rate limiting for ICMP */ + unsigned int rate_tokens; + int flags; +#define DST_HOST 0x0001 +#define DST_NOXFRM 0x0002 +#define DST_NOPOLICY 0x0004 +#define DST_NOHASH 0x0008 +#define DST_NOCACHE 0x0010 union { struct dst_entry *next; struct rtable __rcu *rt_next; @@ -103,10 +96,69 @@ struct dst_entry { #ifdef __KERNEL__ +extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); + +#define DST_METRICS_READ_ONLY 0x1UL +#define __DST_METRICS_PTR(Y) \ + ((u32 *)((Y) & ~DST_METRICS_READ_ONLY)) +#define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) + +static inline bool dst_metrics_read_only(const struct dst_entry *dst) +{ + return dst->_metrics & DST_METRICS_READ_ONLY; +} + +extern void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); + +static inline void dst_destroy_metrics_generic(struct dst_entry *dst) +{ + unsigned long val = dst->_metrics; + if (!(val & DST_METRICS_READ_ONLY)) + __dst_destroy_metrics_generic(dst, val); +} + +static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst) +{ + unsigned long p = dst->_metrics; + + if (p & DST_METRICS_READ_ONLY) + return dst->ops->cow_metrics(dst, p); + return __DST_METRICS_PTR(p); +} + +/* This may only be invoked before the entry has reached global + * visibility. + */ +static inline void dst_init_metrics(struct dst_entry *dst, + const u32 *src_metrics, + bool read_only) +{ + dst->_metrics = ((unsigned long) src_metrics) | + (read_only ? DST_METRICS_READ_ONLY : 0); +} + +static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) +{ + u32 *dst_metrics = dst_metrics_write_ptr(dest); + + if (dst_metrics) { + u32 *src_metrics = DST_METRICS_PTR(src); + + memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32)); + } +} + +static inline u32 *dst_metrics_ptr(struct dst_entry *dst) +{ + return DST_METRICS_PTR(dst); +} + static inline u32 dst_metric_raw(const struct dst_entry *dst, const int metric) { - return dst->_metrics[metric-1]; + u32 *p = DST_METRICS_PTR(dst); + + return p[metric-1]; } static inline u32 @@ -131,22 +183,10 @@ dst_metric_advmss(const struct dst_entry *dst) static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) { - dst->_metrics[metric-1] = val; -} - -static inline void dst_import_metrics(struct dst_entry *dst, const u32 *src_metrics) -{ - memcpy(dst->_metrics, src_metrics, RTAX_MAX * sizeof(u32)); -} + u32 *p = dst_metrics_write_ptr(dst); -static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) -{ - dst_import_metrics(dest, src->_metrics); -} - -static inline u32 *dst_metrics_ptr(struct dst_entry *dst) -{ - return dst->_metrics; + if (p) + p[metric-1] = val; } static inline u32 diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 21a320b8708..dc074632894 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -18,6 +18,7 @@ struct dst_ops { struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*default_mtu)(const struct dst_entry *); + u32 * (*cow_metrics)(struct dst_entry *, unsigned long); void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); diff --git a/include/net/route.h b/include/net/route.h index 93e10c453f6..5677cbf0c6e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -49,6 +49,7 @@ struct fib_nh; struct inet_peer; +struct fib_info; struct rtable { struct dst_entry dst; @@ -69,6 +70,7 @@ struct rtable { /* Miscellaneous cached information */ __be32 rt_spec_dst; /* RFC1122 specific destination */ struct inet_peer *peer; /* long-living peer info */ + struct fib_info *fi; /* for client ref to shared metrics */ }; static inline bool rt_is_input_route(struct rtable *rt) diff --git a/net/core/dst.c b/net/core/dst.c index b99c7c7ffce..57889350570 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); +static const u32 dst_default_metrics[RTAX_MAX]; + void *dst_alloc(struct dst_ops *ops) { struct dst_entry *dst; @@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops) dst->lastuse = jiffies; dst->path = dst; dst->input = dst->output = dst_discard; + dst_init_metrics(dst, dst_default_metrics, true); #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst) } EXPORT_SYMBOL(dst_release); +u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} +EXPORT_SYMBOL(dst_cow_metrics_generic); + +/* Caller asserts that dst_metrics_read_only(dst) is false. */ +void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + unsigned long prev, new; + + new = (unsigned long) dst_default_metrics; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +} +EXPORT_SYMBOL(__dst_destroy_metrics_generic); + /** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e636365d33..42c9c62d341 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = { .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, .default_mtu = dn_dst_default_mtu, + .cow_metrics = dst_cow_metrics_generic, + .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, }; +static void dn_dst_destroy(struct dst_entry *dst) +{ + dst_destroy_metrics_generic(dst); +} + static __inline__ unsigned dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; + unsigned int mss_metric; struct neighbour *n; - unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_import_metrics(&rt->dst, fi->fib_metrics); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } rt->rt_type = res->type; @@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (metric) { + mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (mss_metric) { unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (metric > mss) + if (mss_metric > mss) dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); } return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e5b7cc2db4..980030d4e4a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -152,6 +152,36 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { } +static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } else { + struct rtable *rt = (struct rtable *) dst; + + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } + } + } + return p; +} + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -159,6 +189,7 @@ static struct dst_ops ipv4_dst_ops = { .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .default_mtu = ipv4_default_mtu, + .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -1441,6 +1472,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rt->peer) atomic_inc(&rt->peer->refcnt); + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); if (arp_bind_neighbour(&rt->dst) || !(rt->dst.neighbour->nud_state & @@ -1720,6 +1753,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; + dst_destroy_metrics_generic(dst); + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } if (peer) { rt->peer = NULL; inet_putpeer(peer); @@ -1824,7 +1862,9 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - dst_import_metrics(dst, fi->fib_metrics); + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + dst_init_metrics(dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -2752,6 +2792,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi rt->peer = ort->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); + rt->fi = ort->fi; + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); dst_free(new); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40adde..19fbdec6baa 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); + xfrm_dst_destroy(xdst); } @@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1534508f6c6..45fafa018f1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -105,6 +105,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .default_mtu = ip6_default_mtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -125,6 +126,10 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, }; +static const u32 ip6_template_metrics[RTAX_MAX] = { + [RTAX_HOPLIMIT - 1] = 255, +}; + static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -193,6 +198,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } + dst_destroy_metrics_generic(dst); if (peer) { BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); rt->rt6i_peer = NULL; @@ -2681,7 +2687,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_null_entry->dst, + ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2692,7 +2699,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, + ip6_template_metrics, true); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2702,7 +2710,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, + ip6_template_metrics, true); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index da87428681c..834dc02f1d4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -220,6 +220,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); if (likely(xdst->u.rt6.rt6i_peer)) inet_putpeer(xdst->u.rt6.rt6i_peer); xfrm_dst_destroy(xdst); @@ -257,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, -- cgit v1.2.3-70-g09d2 From ccf434380d1a67df2dcb9113206b77d0cb0a1cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2011 18:08:02 +0000 Subject: net: fix dev_seq_next() Commit c6d14c84566d (net: Introduce for_each_netdev_rcu() iterator) added a race in dev_seq_next(). The rcu_dereference() call should be done _before_ testing the end of list, or we might return a wrong net_device if a concurrent thread changes net_device list under us. Note : discovered thanks to a sparse warning : net/core/dev.c:3919:9: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Eric Dumazet CC: Paul E. McKenney Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- net/core/dev.c | 11 +++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8858422c5c5..c7d70745222 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1447,7 +1447,7 @@ static inline struct net_device *next_net_device_rcu(struct net_device *dev) struct net *net; net = dev_net(dev); - lh = rcu_dereference(dev->dev_list.next); + lh = rcu_dereference(list_next_rcu(&dev->dev_list)); return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } @@ -1457,6 +1457,13 @@ static inline struct net_device *first_net_device(struct net *net) net_device_entry(net->dev_base_head.next); } +static inline struct net_device *first_net_device_rcu(struct net *net) +{ + struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); + + return lh == &net->dev_base_head ? NULL : net_device_entry(lh); +} + extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, diff --git a/net/core/dev.c b/net/core/dev.c index 1b4c07fe295..ddd5df2b61d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4051,12 +4051,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = (v == SEQ_START_TOKEN) ? - first_net_device(seq_file_net(seq)) : - next_net_device((struct net_device *)v); + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + dev = first_net_device_rcu(seq_file_net(seq)); + else + dev = next_net_device_rcu(dev); ++*pos; - return rcu_dereference(dev); + return dev; } void dev_seq_stop(struct seq_file *seq, void *v) -- cgit v1.2.3-70-g09d2 From 725d1e1b457dc2bbebb337677e73efe7c6d14da5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 28 Jan 2011 14:05:05 -0800 Subject: ipv4: Attach FIB info to dst_default_metrics when possible If there are no explicit metrics attached to a route, hook fi->fib_info up to dst_default_metrics. Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/core/dst.c | 2 +- net/ipv4/fib_semantics.c | 12 ++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/include/net/dst.h b/include/net/dst.h index 94a8c234ea2..484f80b69ad 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -97,6 +97,7 @@ struct dst_entry { #ifdef __KERNEL__ extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); +extern const u32 dst_default_metrics[RTAX_MAX]; #define DST_METRICS_READ_ONLY 0x1UL #define __DST_METRICS_PTR(Y) \ diff --git a/net/core/dst.c b/net/core/dst.c index 57889350570..c1674fde827 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,7 +164,7 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -static const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX]; void *dst_alloc(struct dst_ops *ops) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 363ec39228d..48e93a56007 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -152,7 +152,8 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); - kfree(fi->fib_metrics); + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); kfree(fi); } @@ -743,9 +744,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; + } else + fi->fib_metrics = (u32 *) dst_default_metrics; fib_info_cnt++; fi->fib_net = hold_net(net); -- cgit v1.2.3-70-g09d2 From b6644cb706610874104dbf3359e3b67aa59cbc27 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 9 Feb 2011 19:16:15 -0800 Subject: net: rename group sysfs entry to netdev_group commit a512b92 adds sysfs entry for net device group, but before this commit, tun also uses group sysfs, so after this commit checkin, kernel warns like this: sysfs: cannot create duplicate filename '/devices/virtual/net/vnet0/group' Since tun has used this for years, rename sysfs under tun might break existing userspace, so rename group sysfs entry for net device group is a better choice. Signed-off-by: Xiaotian Feng Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2e4a393dfc3..5ceb257e860 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -330,7 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), + __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; -- cgit v1.2.3-70-g09d2 From d59cfde2fb960b5970ccb5a38cea25d38b37a8e8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 00:46:06 +0000 Subject: net: remove the unnecessary dance around skb_bond_should_drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to check (master) twice and to drive in and out the header file. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ----------- net/core/dev.c | 6 +++--- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7d70745222..5a5baeaaa50 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2437,17 +2437,6 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } -extern int __skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master); - -static inline int skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master) -{ - if (master) - return __skb_bond_should_drop(skb, master); - return 0; -} - extern struct pernet_operations __net_initdata loopback_net_ops; static inline int dev_ethtool_get_settings(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 6392ea0a591..d874fd1baf4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3105,7 +3105,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. */ -int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) +static int __skb_bond_should_drop(struct sk_buff *skb, + struct net_device *master) { struct net_device *dev = skb->dev; @@ -3139,7 +3140,6 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) } return 0; } -EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { @@ -3177,7 +3177,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (skb->deliver_no_wcard) null_or_orig = orig_dev; else if (master) { - if (skb_bond_should_drop(skb, master)) { + if (__skb_bond_should_drop(skb, master)) { skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ } else -- cgit v1.2.3-70-g09d2 From 1765a575334f1a232c1478accdee5c7d19f4b3e3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 06:48:36 +0000 Subject: net: make dev->master general dev->master is now tightly connected to bonding driver. This patch makes this pointer more general and ready to be used by others. - netdev_set_master() - bond specifics moved to new function netdev_set_bond_master() - introduced netif_is_bond_slave() to check if device is a bonding slave Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/infiniband/hw/nes/nes.c | 3 ++- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/net/bonding/bond_main.c | 10 ++++---- drivers/net/cxgb3/cxgb3_offload.c | 3 ++- include/linux/netdevice.h | 7 ++++++ net/core/dev.c | 49 ++++++++++++++++++++++++++++---------- 6 files changed, 54 insertions(+), 20 deletions(-) (limited to 'net/core') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 3b4ec3238ce..3d7f3664b67 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -153,7 +153,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nesdev, nesdev->netdev[0]->name); netdev = nesdev->netdev[0]; nesvnic = netdev_priv(netdev); - is_bonded = (netdev->master == event_netdev); + is_bonded = netif_is_bond_slave(netdev) && + (netdev->master == event_netdev); if ((netdev == event_netdev) || is_bonded) { if (nesvnic->rdma_enabled == 0) { nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 009ec814d51..ec3aa11c36c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1118,7 +1118,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi return rc; } - if (nesvnic->netdev->master) + if (netif_is_bond_slave(netdev)) netdev = nesvnic->netdev->master; else netdev = nesvnic->netdev; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1df9f0ea918..9f877878d63 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1594,9 +1594,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } - res = netdev_set_master(slave_dev, bond_dev); + res = netdev_set_bond_master(slave_dev, bond_dev); if (res) { - pr_debug("Error %d calling netdev_set_master\n", res); + pr_debug("Error %d calling netdev_set_bond_master\n", res); goto err_restore_mac; } /* open the slave since the application closed it */ @@ -1812,7 +1812,7 @@ err_close: dev_close(slave_dev); err_unset_master: - netdev_set_master(slave_dev, NULL); + netdev_set_bond_master(slave_dev, NULL); err_restore_mac: if (!bond->params.fail_over_mac) { @@ -1992,7 +1992,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) netif_addr_unlock_bh(bond_dev); } - netdev_set_master(slave_dev, NULL); + netdev_set_bond_master(slave_dev, NULL); #ifdef CONFIG_NET_POLL_CONTROLLER read_lock_bh(&bond->lock); @@ -2114,7 +2114,7 @@ static int bond_release_all(struct net_device *bond_dev) netif_addr_unlock_bh(bond_dev); } - netdev_set_master(slave_dev, NULL); + netdev_set_bond_master(slave_dev, NULL); /* close slave before restoring its mac address */ dev_close(slave_dev); diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c index 7ea94b5205f..862804f32b6 100644 --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -186,9 +186,10 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter, dev = NULL; if (grp) dev = vlan_group_get_device(grp, vlan); - } else + } else if (netif_is_bond_slave(dev)) { while (dev->master) dev = dev->master; + } return dev; } } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a5baeaaa50..5a42b100376 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2377,6 +2377,8 @@ extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); +extern int netdev_set_bond_master(struct net_device *dev, + struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features); #ifdef CONFIG_BUG @@ -2437,6 +2439,11 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline int netif_is_bond_slave(struct net_device *dev) +{ + return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; +} + extern struct pernet_operations __net_initdata loopback_net_ops; static inline int dev_ethtool_get_settings(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index d874fd1baf4..a4132766d36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3146,7 +3146,6 @@ static int __netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *master; struct net_device *null_or_orig; struct net_device *orig_or_bond; int ret = NET_RX_DROP; @@ -3173,15 +3172,19 @@ static int __netif_receive_skb(struct sk_buff *skb) */ null_or_orig = NULL; orig_dev = skb->dev; - master = ACCESS_ONCE(orig_dev->master); if (skb->deliver_no_wcard) null_or_orig = orig_dev; - else if (master) { - if (__skb_bond_should_drop(skb, master)) { - skb->deliver_no_wcard = 1; - null_or_orig = orig_dev; /* deliver only exact match */ - } else - skb->dev = master; + else if (netif_is_bond_slave(orig_dev)) { + struct net_device *bond_master = ACCESS_ONCE(orig_dev->master); + + if (likely(bond_master)) { + if (__skb_bond_should_drop(skb, bond_master)) { + skb->deliver_no_wcard = 1; + /* deliver only exact match */ + null_or_orig = orig_dev; + } else + skb->dev = bond_master; + } } __this_cpu_inc(softnet_data.processed); @@ -4346,15 +4349,14 @@ static int __init dev_proc_init(void) /** - * netdev_set_master - set up master/slave pair + * netdev_set_master - set up master pointer * @slave: slave device * @master: new master device * * Changes the master device of the slave. Pass %NULL to break the * bonding. The caller must hold the RTNL semaphore. On a failure * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. + * are adjusted and the function returns zero. */ int netdev_set_master(struct net_device *slave, struct net_device *master) { @@ -4374,6 +4376,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) synchronize_net(); dev_put(old); } + return 0; +} +EXPORT_SYMBOL(netdev_set_master); + +/** + * netdev_set_bond_master - set up bonding master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success %RTM_NEWLINK is sent + * to the routing socket and the function returns zero. + */ +int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +{ + int err; + + ASSERT_RTNL(); + + err = netdev_set_master(slave, master); + if (err) + return err; if (master) slave->flags |= IFF_SLAVE; else @@ -4382,7 +4407,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_set_bond_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { -- cgit v1.2.3-70-g09d2 From fbaec0ea54f7d9131891ff98744e82c073ce03b1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 13 Feb 2011 10:15:37 +0000 Subject: rtnetlink: implement setting of master device This patch allows userspace to enslave/release slave devices via netlink interface using IFLA_MASTER. This introduces generic way to add/remove underling devices. Signed-off-by: Jiri Pirko Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ net/core/rtnetlink.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a42b100376..d08ef653857 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -783,6 +783,14 @@ struct netdev_tc_txq { * Set hardware filter for RFS. rxq_index is the target queue index; * flow_id is a flow ID to be passed to rps_may_expire_flow() later. * Return the filter ID on success, or a negative error code. + * + * Slave management functions (for bridge, bonding, etc). User should + * call netdev_set_master() to set dev->master properly. + * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); + * Called to make another netdev an underling. + * + * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + * Called to release previously enslaved netdev. */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -862,6 +870,10 @@ struct net_device_ops { u16 rxq_index, u32 flow_id); #endif + int (*ndo_add_slave)(struct net_device *dev, + struct net_device *slave_dev); + int (*ndo_del_slave)(struct net_device *dev, + struct net_device *slave_dev); }; /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index da0fe457c85..49f7ea5b4c7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, + [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1178,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) return err; } +static int do_set_master(struct net_device *dev, int ifindex) +{ + struct net_device *master_dev; + const struct net_device_ops *ops; + int err; + + if (dev->master) { + if (dev->master->ifindex == ifindex) + return 0; + ops = dev->master->netdev_ops; + if (ops->ndo_del_slave) { + err = ops->ndo_del_slave(dev->master, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + + if (ifindex) { + master_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!master_dev) + return -EINVAL; + ops = master_dev->netdev_ops; + if (ops->ndo_add_slave) { + err = ops->ndo_add_slave(master_dev, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + return 0; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -1301,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); -- cgit v1.2.3-70-g09d2 From 5c56580b74e57e56f30e3c5bbc9d7ab487858497 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 19:39:21 +0000 Subject: net: Adjust TX queue kobjects if number of queues changes during unregister If the root qdisc for a net device is mqprio, and the driver's ndo_setup_tc() operation dynamically adds and remvoes TX queues, netif_set_real_num_tx_queues() will be called during device unregistration to remove the extra TX queues when the qdisc is destroyed. Currently this causes the corresponding kobjects to be leaked, and the device's reference count never drops to 0. Signed-off-by: Ben Hutchings --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 6392ea0a591..30c71f9b041 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1648,7 +1648,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; - if (dev->reg_state == NETREG_REGISTERED) { + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, -- cgit v1.2.3-70-g09d2 From 69a19ee60d5d5adc0addbdffd254f83b60660a07 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 20:32:04 +0000 Subject: net: RPS: Make hardware-accelerated RFS conditional on NETIF_F_NTUPLE For testing and debugging purposes it is useful to be able to disable hardware acceleration of RFS without disabling RFS altogether. Since this is a similar feature to 'n-tuple' flow steering through the ethtool API, test the same feature flag that controls that. Signed-off-by: Ben Hutchings --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 30c71f9b041..54aaca69a02 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2607,7 +2607,8 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, int rc; /* Should we steer this flow to a different hardware queue? */ - if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || + !(dev->features & NETIF_F_NTUPLE)) goto out; rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); if (rxq_index == skb_get_rx_queue(skb)) -- cgit v1.2.3-70-g09d2 From 9a279ea3a77ebcc91b68f0546e7cfa5018a12513 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: move EXPORT_SYMBOL(ethtool_op_set_tx_csum) to correct place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 5984ee0c713..9eb82775a55 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -55,6 +55,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +EXPORT_SYMBOL(ethtool_op_set_tx_csum); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) { @@ -1124,7 +1125,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -EXPORT_SYMBOL(ethtool_op_set_tx_csum); static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) { -- cgit v1.2.3-70-g09d2 From 212b573f5552c60265da721ff9ce32e3462a2cdd Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: enable GSO and GRO by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d08ef653857..168e3ad14da 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -984,6 +984,9 @@ struct net_device { NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) + /* changeable features with no special hardware requirements */ +#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) + /* Interface index. Unique device identifier */ int ifindex; int iflink; diff --git a/net/core/dev.c b/net/core/dev.c index 4580460ebec..8686f6ffe7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5274,6 +5274,12 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_TSO; } + /* Software GSO depends on SG. */ + if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + features &= ~NETIF_F_GSO; + } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ @@ -5430,11 +5436,15 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - dev->features = netdev_fix_features(dev, dev->features); + /* Enable software offloads by default - will be stripped in + * netdev_fix_features() if not supported. */ + dev->features |= NETIF_F_SOFT_FEATURES; - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; + /* Avoid warning from netdev_fix_features() for GSO without SG */ + if (!(dev->features & NETIF_F_SG)) + dev->features &= ~NETIF_F_GSO; + + dev->features = netdev_fix_features(dev, dev->features); /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features -- cgit v1.2.3-70-g09d2 From 340ae1654c0667e0cdd2a6d4dc16f7946e018881 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: factorize ethtool_get_strings() and ethtool_get_sset_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed for unified offloads patch. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9eb82775a55..85aaeab862a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -172,6 +172,25 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +static int __ethtool_get_sset_count(struct net_device *dev, int sset) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (ops && ops->get_sset_count && ops->get_strings) + return ops->get_sset_count(dev, sset); + else + return -EOPNOTSUPP; +} + +static void __ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -252,14 +271,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; u64 sset_mask; int i, idx = 0, n_bits = 0, ret, rc; u32 *info_buf = NULL; - if (!ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; @@ -286,7 +301,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, if (!(sset_mask & (1ULL << i))) continue; - rc = ops->get_sset_count(dev, i); + rc = __ethtool_get_sset_count(dev, i); if (rc >= 0) { info.sset_mask |= (1ULL << i); info_buf[idx++] = rc; @@ -1287,17 +1302,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; - if (!ops->get_strings || !ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - ret = ops->get_sset_count(dev, gstrings.string_set); + ret = __ethtool_get_sset_count(dev, gstrings.string_set); if (ret < 0) return ret; @@ -1307,7 +1318,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - ops->get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1317,7 +1328,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; ret = 0; - out: +out: kfree(data); return ret; } -- cgit v1.2.3-70-g09d2 From 0a417704777ed29d0e8c72b7274a328e61248e75 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: ethtool: factorize get/set_one_feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to enable GRO even if RX csum is disabled. GRO will not be used for packets without hardware checksum anyway. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 + net/core/ethtool.c | 274 ++++++++++++++++++++++------------------------ 2 files changed, 138 insertions(+), 142 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 168e3ad14da..dede3fdbb4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,6 +976,12 @@ struct net_device { #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) + +#define NETIF_F_ALL_TX_OFFLOADS (NETIF_F_ALL_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC) + /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 85aaeab862a..c3fb8f90de6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -191,6 +191,109 @@ static void __ethtool_get_strings(struct net_device *dev, ops->get_strings(dev, stringset, data); } +static u32 ethtool_get_feature_mask(u32 eth_cmd) +{ + /* feature masks of legacy discrete ethtool ops */ + + switch (eth_cmd) { + case ETHTOOL_GTXCSUM: + case ETHTOOL_STXCSUM: + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GSG: + case ETHTOOL_SSG: + return NETIF_F_SG; + case ETHTOOL_GTSO: + case ETHTOOL_STSO: + return NETIF_F_ALL_TSO; + case ETHTOOL_GUFO: + case ETHTOOL_SUFO: + return NETIF_F_UFO; + case ETHTOOL_GGSO: + case ETHTOOL_SGSO: + return NETIF_F_GSO; + case ETHTOOL_GGRO: + case ETHTOOL_SGRO: + return NETIF_F_GRO; + default: + BUG(); + } +} + +static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return NULL; + + switch (ethcmd) { + case ETHTOOL_GTXCSUM: + return ops->get_tx_csum; + case ETHTOOL_SSG: + return ops->get_sg; + case ETHTOOL_STSO: + return ops->get_tso; + case ETHTOOL_SUFO: + return ops->get_ufo; + default: + return NULL; + } +} + +static int ethtool_get_one_feature(struct net_device *dev, + char __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata = { + .cmd = ethcmd, + .data = !!(dev->features & ethtool_get_feature_mask(ethcmd)), + }; + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + if (actor) + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_sg(struct net_device *dev, u32 data); +static int __ethtool_set_tso(struct net_device *dev, u32 data); +static int __ethtool_set_ufo(struct net_device *dev, u32 data); + +static int ethtool_set_one_feature(struct net_device *dev, + void __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata; + u32 mask; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_STXCSUM: + return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SSG: + return __ethtool_set_sg(dev, edata.data); + case ETHTOOL_STSO: + return __ethtool_set_tso(dev, edata.data); + case ETHTOOL_SUFO: + return __ethtool_set_ufo(dev, edata.data); + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + mask = ethtool_get_feature_mask(ethcmd); + if (edata.data) + dev->features |= mask; + else + dev->features &= ~mask; + return 0; + default: + return -EOPNOTSUPP; + } +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -1107,6 +1210,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + if (!data && dev->ethtool_ops->set_tso) { err = dev->ethtool_ops->set_tso(dev, 0); if (err) @@ -1121,24 +1227,20 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return dev->ethtool_ops->set_sg(dev, data); } -static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; int err; if (!dev->ethtool_ops->set_tx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) { + if (!data && dev->ethtool_ops->set_sg) { err = __ethtool_set_sg(dev, 0); if (err) return err; } - return dev->ethtool_ops->set_tx_csum(dev, edata.data); + return dev->ethtool_ops->set_tx_csum(dev, data); } static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) @@ -1157,108 +1259,28 @@ static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_rx_csum(dev, edata.data); } -static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && - !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - return __ethtool_set_sg(dev, edata.data); -} - -static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tso(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_tso) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - return dev->ethtool_ops->set_tso(dev, edata.data); + return dev->ethtool_ops->set_tso(dev, data); } -static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_ufo(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_ufo) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + if (data && !((dev->features & NETIF_F_GEN_CSUM) || (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, edata.data); -} - -static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGSO }; - - edata.data = dev->features & NETIF_F_GSO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data) - dev->features |= NETIF_F_GSO; - else - dev->features &= ~NETIF_F_GSO; - return 0; -} - -static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGRO }; - - edata.data = dev->features & NETIF_F_GRO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data) { - u32 rxcsum = dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum(dev) : - ethtool_op_get_rx_csum(dev); - - if (!rxcsum) - return -EINVAL; - dev->features |= NETIF_F_GRO; - } else - dev->features &= ~NETIF_F_GRO; - - return 0; + return dev->ethtool_ops->set_ufo(dev, data); } static int ethtool_self_test(struct net_device *dev, char __user *useraddr) @@ -1590,33 +1612,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: rc = ethtool_set_rx_csum(dev, useraddr); break; - case ETHTOOL_GTXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tx_csum ? - dev->ethtool_ops->get_tx_csum : - ethtool_op_get_tx_csum)); - break; - case ETHTOOL_STXCSUM: - rc = ethtool_set_tx_csum(dev, useraddr); - break; - case ETHTOOL_GSG: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_sg ? - dev->ethtool_ops->get_sg : - ethtool_op_get_sg)); - break; - case ETHTOOL_SSG: - rc = ethtool_set_sg(dev, useraddr); - break; - case ETHTOOL_GTSO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tso ? - dev->ethtool_ops->get_tso : - ethtool_op_get_tso)); - break; - case ETHTOOL_STSO: - rc = ethtool_set_tso(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1632,21 +1627,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; - case ETHTOOL_GUFO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_ufo ? - dev->ethtool_ops->get_ufo : - ethtool_op_get_ufo)); - break; - case ETHTOOL_SUFO: - rc = ethtool_set_ufo(dev, useraddr); - break; - case ETHTOOL_GGSO: - rc = ethtool_get_gso(dev, useraddr); - break; - case ETHTOOL_SGSO: - rc = ethtool_set_gso(dev, useraddr); - break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, (dev->ethtool_ops->get_flags ? @@ -1677,12 +1657,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_GGRO: - rc = ethtool_get_gro(dev, useraddr); - break; - case ETHTOOL_SGRO: - rc = ethtool_set_gro(dev, useraddr); - break; case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; @@ -1704,6 +1678,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GTXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GTSO: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + rc = ethtool_get_one_feature(dev, useraddr, ethcmd); + break; + case ETHTOOL_STXCSUM: + case ETHTOOL_SSG: + case ETHTOOL_STSO: + case ETHTOOL_SUFO: + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + rc = ethtool_set_one_feature(dev, useraddr, ethcmd); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3-70-g09d2 From 5455c6998d34dc983a8693500e4dffefc3682dc5 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: net: Introduce new feature setting ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a new framework to handle device features setting. It consists of: - new fields in struct net_device: + hw_features - features that hw/driver supports toggling + wanted_features - features that user wants enabled, when possible - new netdev_ops: + feat = ndo_fix_features(dev, feat) - API checking constraints for enabling features or their combinations + ndo_set_features(dev) - API updating hardware state to match changed dev->features - new ethtool commands: + ETHTOOL_GFEATURES/ETHTOOL_SFEATURES: get/set dev->wanted_features and trigger device reconfiguration if resulting dev->features changed + ETHTOOL_GSTRINGS(ETH_SS_FEATURES): get feature bits names (meaning) Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 85 +++++++++++++++++++++++++++++++ include/linux/netdevice.h | 37 +++++++++++++- net/core/dev.c | 46 ++++++++++++++--- net/core/ethtool.c | 125 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 283 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1908929204a..806e716bb4f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -251,6 +251,7 @@ enum ethtool_stringset { ETH_SS_STATS, ETH_SS_PRIV_FLAGS, ETH_SS_NTUPLE_FILTERS, + ETH_SS_FEATURES, }; /* for passing string sets for data tagging */ @@ -523,6 +524,87 @@ struct ethtool_flash { char data[ETHTOOL_FLASH_MAX_FILENAME]; }; +/* for returning and changing feature sets */ + +/** + * struct ethtool_get_features_block - block with state of 32 features + * @available: mask of changeable features + * @requested: mask of features requested to be enabled if possible + * @active: mask of currently enabled features + * @never_changed: mask of features not changeable for any device + */ +struct ethtool_get_features_block { + __u32 available; + __u32 requested; + __u32 active; + __u32 never_changed; +}; + +/** + * struct ethtool_gfeatures - command to get state of device's features + * @cmd: command number = %ETHTOOL_GFEATURES + * @size: in: number of elements in the features[] array; + * out: number of elements in features[] needed to hold all features + * @features: state of features + */ +struct ethtool_gfeatures { + __u32 cmd; + __u32 size; + struct ethtool_get_features_block features[0]; +}; + +/** + * struct ethtool_set_features_block - block with request for 32 features + * @valid: mask of features to be changed + * @requested: values of features to be changed + */ +struct ethtool_set_features_block { + __u32 valid; + __u32 requested; +}; + +/** + * struct ethtool_sfeatures - command to request change in device's features + * @cmd: command number = %ETHTOOL_SFEATURES + * @size: array size of the features[] array + * @features: feature change masks + */ +struct ethtool_sfeatures { + __u32 cmd; + __u32 size; + struct ethtool_set_features_block features[0]; +}; + +/* + * %ETHTOOL_SFEATURES changes features present in features[].valid to the + * values of corresponding bits in features[].requested. Bits in .requested + * not set in .valid or not changeable are ignored. + * + * Returns %EINVAL when .valid contains undefined or never-changable bits + * or size is not equal to required number of features words (32-bit blocks). + * Returns >= 0 if request was completed; bits set in the value mean: + * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not + * changeable (not present in %ETHTOOL_GFEATURES' features[].available) + * those bits were ignored. + * %ETHTOOL_F_WISH - some or all changes requested were recorded but the + * resulting state of bits masked by .valid is not equal to .requested. + * Probably there are other device-specific constraints on some features + * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered + * here as though ignored bits were cleared. + * + * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of + * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands + * for ETH_SS_FEATURES string set. First entry in the table corresponds to least + * significant bit in features[0] fields. Empty strings mark undefined features. + */ +enum ethtool_sfeatures_retval_bits { + ETHTOOL_F_UNSUPPORTED__BIT, + ETHTOOL_F_WISH__BIT, +}; + +#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) +#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) + #ifdef __KERNEL__ #include @@ -744,6 +826,9 @@ struct ethtool_ops { #define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */ #define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */ +#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */ +#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */ + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dede3fdbb4b..85f67e225f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -791,6 +791,18 @@ struct netdev_tc_txq { * * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. + * + * Feature/offload setting functions. + * u32 (*ndo_fix_features)(struct net_device *dev, u32 features); + * Adjusts the requested feature flags according to device-specific + * constraints, and returns the resulting flags. Must not modify + * the device state. + * + * int (*ndo_set_features)(struct net_device *dev, u32 features); + * Called to update device configuration to new features. Passed + * feature set might be less than what was returned by ndo_fix_features()). + * Must return >0 or -errno if it changed dev->features itself. + * */ #define HAVE_NET_DEVICE_OPS struct net_device_ops { @@ -874,6 +886,10 @@ struct net_device_ops { struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + u32 (*ndo_fix_features)(struct net_device *dev, + u32 features); + int (*ndo_set_features)(struct net_device *dev, + u32 features); }; /* @@ -925,12 +941,18 @@ struct net_device { struct list_head napi_list; struct list_head unreg_list; - /* Net device features */ + /* currently active device features */ u32 features; - + /* user-changeable features */ + u32 hw_features; + /* user-requested features */ + u32 wanted_features; /* VLAN feature mask */ u32 vlan_features; + /* Net device feature bits; if you change something, + * also update netdev_features_strings[] in ethtool.c */ + #define NETIF_F_SG 1 /* Scatter/gather IO. */ #define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ @@ -966,6 +988,12 @@ struct net_device { #define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT) #define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT) + /* Features valid for ethtool to change */ + /* = all defined minus driver/device-class-related */ +#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ + NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) +#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE) + /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ NETIF_F_TSO6 | NETIF_F_UFO) @@ -2428,8 +2456,13 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l extern void linkwatch_run_queue(void); +static inline u32 netdev_get_wanted_features(struct net_device *dev) +{ + return (dev->features & ~dev->hw_features) | dev->wanted_features; +} u32 netdev_increment_features(u32 all, u32 one, u32 mask); u32 netdev_fix_features(struct net_device *dev, u32 features); +void netdev_update_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 8686f6ffe7f..4f6943928fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5302,6 +5302,37 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); +void netdev_update_features(struct net_device *dev) +{ + u32 features; + int err = 0; + + features = netdev_get_wanted_features(dev); + + if (dev->netdev_ops->ndo_fix_features) + features = dev->netdev_ops->ndo_fix_features(dev, features); + + /* driver might be less strict about feature dependencies */ + features = netdev_fix_features(dev, features); + + if (dev->features == features) + return; + + netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + dev->features, features); + + if (dev->netdev_ops->ndo_set_features) + err = dev->netdev_ops->ndo_set_features(dev, features); + + if (!err) + dev->features = features; + else if (err < 0) + netdev_err(dev, + "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", + err, features, dev->features); +} +EXPORT_SYMBOL(netdev_update_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from @@ -5436,15 +5467,18 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Enable software offloads by default - will be stripped in - * netdev_fix_features() if not supported. */ - dev->features |= NETIF_F_SOFT_FEATURES; + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). + */ + dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = (dev->features & dev->hw_features) + | NETIF_F_SOFT_FEATURES; /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->features & NETIF_F_SG)) - dev->features &= ~NETIF_F_GSO; + if (!(dev->wanted_features & NETIF_F_SG)) + dev->wanted_features &= ~NETIF_F_GSO; - dev->features = netdev_fix_features(dev, dev->features); + netdev_update_features(dev); /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c3fb8f90de6..95773960dc7 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -172,10 +172,120 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +#define ETHTOOL_DEV_FEATURE_WORDS 1 + +static int ethtool_get_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_gfeatures cmd = { + .cmd = ETHTOOL_GFEATURES, + .size = ETHTOOL_DEV_FEATURE_WORDS, + }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { + { + .available = dev->hw_features, + .requested = dev->wanted_features, + .active = dev->features, + .never_changed = NETIF_F_NEVER_CHANGE, + }, + }; + u32 __user *sizeaddr; + u32 copy_size; + + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); + if (get_user(copy_size, sizeaddr)) + return -EFAULT; + + if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) + copy_size = ETHTOOL_DEV_FEATURE_WORDS; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + return -EFAULT; + + return 0; +} + +static int ethtool_set_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_sfeatures cmd; + struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; + int ret = 0; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + + if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) + return -EINVAL; + + if (copy_from_user(features, useraddr, sizeof(features))) + return -EFAULT; + + if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; + + if (features[0].valid & ~dev->hw_features) { + features[0].valid &= dev->hw_features; + ret |= ETHTOOL_F_UNSUPPORTED; + } + + dev->wanted_features &= ~features[0].valid; + dev->wanted_features |= features[0].valid & features[0].requested; + netdev_update_features(dev); + + if ((dev->wanted_features ^ dev->features) & features[0].valid) + ret |= ETHTOOL_F_WISH; + + return ret; +} + +static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { + /* NETIF_F_SG */ "tx-scatter-gather", + /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", + /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", + /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", + /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_HIGHDMA */ "highdma", + /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", + /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", + + /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", + /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", + /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", + /* NETIF_F_GSO */ "tx-generic-segmentation", + /* NETIF_F_LLTX */ "tx-lockless", + /* NETIF_F_NETNS_LOCAL */ "netns-local", + /* NETIF_F_GRO */ "rx-gro", + /* NETIF_F_LRO */ "rx-lro", + + /* NETIF_F_TSO */ "tx-tcp-segmentation", + /* NETIF_F_UFO */ "tx-udp-fragmentation", + /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", + /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", + /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", + /* NETIF_F_FSO */ "tx-fcoe-segmentation", + "", + "", + + /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", + /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", + /* NETIF_F_FCOE_MTU */ "fcoe-mtu", + /* NETIF_F_NTUPLE */ "rx-ntuple-filter", + /* NETIF_F_RXHASH */ "rx-hashing", + "", + "", + "", +}; + static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; + if (sset == ETH_SS_FEATURES) + return ARRAY_SIZE(netdev_features_strings); + if (ops && ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -187,8 +297,12 @@ static void __ethtool_get_strings(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; - /* ops->get_strings is valid because checked earlier */ - ops->get_strings(dev, stringset, data); + if (stringset == ETH_SS_FEATURES) + memcpy(data, netdev_features_strings, + sizeof(netdev_features_strings)); + else + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); } static u32 ethtool_get_feature_mask(u32 eth_cmd) @@ -1533,6 +1647,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: + case ETHTOOL_GFEATURES: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1678,6 +1793,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GFEATURES: + rc = ethtool_get_features(dev, useraddr); + break; + case ETHTOOL_SFEATURES: + rc = ethtool_set_features(dev, useraddr); + break; case ETHTOOL_GTXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: -- cgit v1.2.3-70-g09d2 From 86794881c29a7ea6271644b49ad81518cabda96b Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: net: ethtool: use ndo_fix_features for offload setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 95773960dc7..65999974f74 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -357,15 +357,21 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr, u32 ethcmd) { + u32 mask = ethtool_get_feature_mask(ethcmd); struct ethtool_value edata = { .cmd = ethcmd, - .data = !!(dev->features & ethtool_get_feature_mask(ethcmd)), + .data = !!(dev->features & mask), }; - u32 (*actor)(struct net_device *); - actor = __ethtool_get_one_feature_actor(dev, ethcmd); - if (actor) - edata.data = actor(dev); + /* compatibility with discrete get_ ops */ + if (!(dev->hw_features & mask)) { + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + + if (actor) + edata.data = actor(dev); + } if (copy_to_user(useraddr, &edata, sizeof(edata))) return -EFAULT; @@ -386,6 +392,27 @@ static int ethtool_set_one_feature(struct net_device *dev, if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + mask = ethtool_get_feature_mask(ethcmd); + mask &= dev->hw_features; + if (mask) { + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; + + netdev_update_features(dev); + return 0; + } + + /* Driver is not converted to ndo_fix_features or does not + * support changing this offload. In the latter case it won't + * have corresponding ethtool_ops field set. + * + * Following part is to be removed after all drivers advertise + * their changeable features in netdev->hw_features and stop + * using discrete offload setting ops. + */ + switch (ethcmd) { case ETHTOOL_STXCSUM: return __ethtool_set_tx_csum(dev, edata.data); @@ -395,14 +422,6 @@ static int ethtool_set_one_feature(struct net_device *dev, return __ethtool_set_tso(dev, edata.data); case ETHTOOL_SUFO: return __ethtool_set_ufo(dev, edata.data); - case ETHTOOL_SGSO: - case ETHTOOL_SGRO: - mask = ethtool_get_feature_mask(ethcmd); - if (edata.data) - dev->features |= mask; - else - dev->features &= ~mask; - return 0; default: return -EOPNOTSUPP; } -- cgit v1.2.3-70-g09d2 From da8ac86c4a56a14bf8deea7d2f92d0a453c67f91 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:18 +0000 Subject: net: use ndo_fix_features for ethtool_ops->set_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 65999974f74..65b3d50a6df 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -427,6 +427,34 @@ static int ethtool_set_one_feature(struct net_device *dev, } } +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + u32 changed; + + if (data & ~flags_dup_features) + return -EINVAL; + + /* legacy set_flags() op */ + if (dev->ethtool_ops->set_flags) { + if (unlikely(dev->hw_features & flags_dup_features)) + netdev_warn(dev, + "driver BUG: mixed hw_features and set_flags()\n"); + return dev->ethtool_ops->set_flags(dev, data); + } + + /* allow changing only bits set in hw_features */ + changed = (data ^ dev->wanted_features) & flags_dup_features; + if (changed & ~dev->hw_features) + return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; + + dev->wanted_features = + (dev->wanted_features & ~changed) | data; + + netdev_update_features(dev); + + return 0; +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -1768,8 +1796,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_flags); + rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); break; case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, -- cgit v1.2.3-70-g09d2 From e83d360d9a7e5d71d55c13e96b19109a2ea23bf0 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 15 Feb 2011 16:59:18 +0000 Subject: net: introduce NETIF_F_RXCSUM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce NETIF_F_RXCSUM to replace device-private flags for RX checksum offload. Integrate it with ndo_fix_features. ethtool_op_get_rx_csum() is removed altogether as nothing in-tree uses it. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 - include/linux/netdevice.h | 5 ++++- net/core/ethtool.c | 47 +++++++++++++++++++++++------------------------ 3 files changed, 27 insertions(+), 26 deletions(-) (limited to 'net/core') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 806e716bb4f..54d776c2c1b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -625,7 +625,6 @@ struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -u32 ethtool_op_get_rx_csum(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85f67e225f6..ffe56c16df8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,6 +977,7 @@ struct net_device { #define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ #define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */ #define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */ +#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -992,7 +993,7 @@ struct net_device { /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \ NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) -#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE) +#define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ @@ -2510,6 +2511,8 @@ static inline int dev_ethtool_get_settings(struct net_device *dev, static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev) { + if (dev->hw_features & NETIF_F_RXCSUM) + return !!(dev->features & NETIF_F_RXCSUM); if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum) return 0; return dev->ethtool_ops->get_rx_csum(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 65b3d50a6df..66cdc76770c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_rx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_rx_csum); - u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; @@ -274,7 +268,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_FCOE_MTU */ "fcoe-mtu", /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", - "", + /* NETIF_F_RXCSUM */ "rx-checksum", "", "", }; @@ -313,6 +307,9 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GRXCSUM: + case ETHTOOL_SRXCSUM: + return NETIF_F_RXCSUM; case ETHTOOL_GSG: case ETHTOOL_SSG: return NETIF_F_SG; @@ -343,6 +340,8 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) switch (ethcmd) { case ETHTOOL_GTXCSUM: return ops->get_tx_csum; + case ETHTOOL_GRXCSUM: + return ops->get_rx_csum; case ETHTOOL_SSG: return ops->get_sg; case ETHTOOL_STSO: @@ -354,6 +353,11 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) } } +static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) +{ + return !!(dev->features & NETIF_F_ALL_CSUM); +} + static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr, u32 ethcmd) { @@ -369,6 +373,10 @@ static int ethtool_get_one_feature(struct net_device *dev, actor = __ethtool_get_one_feature_actor(dev, ethcmd); + /* bug compatibility with old get_rx_csum */ + if (ethcmd == ETHTOOL_GRXCSUM && !actor) + actor = __ethtool_get_rx_csum_oldbug; + if (actor) edata.data = actor(dev); } @@ -379,6 +387,7 @@ static int ethtool_get_one_feature(struct net_device *dev, } static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); static int __ethtool_set_sg(struct net_device *dev, u32 data); static int __ethtool_set_tso(struct net_device *dev, u32 data); static int __ethtool_set_ufo(struct net_device *dev, u32 data); @@ -416,6 +425,8 @@ static int ethtool_set_one_feature(struct net_device *dev, switch (ethcmd) { case ETHTOOL_STXCSUM: return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SRXCSUM: + return __ethtool_set_rx_csum(dev, edata.data); case ETHTOOL_SSG: return __ethtool_set_sg(dev, edata.data); case ETHTOOL_STSO: @@ -1404,20 +1415,15 @@ static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) return dev->ethtool_ops->set_tx_csum(dev, data); } -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_rx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) + if (!data) dev->features &= ~NETIF_F_GRO; - return dev->ethtool_ops->set_rx_csum(dev, edata.data); + return dev->ethtool_ops->set_rx_csum(dev, data); } static int __ethtool_set_tso(struct net_device *dev, u32 data) @@ -1765,15 +1771,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SPAUSEPARAM: rc = ethtool_set_pauseparam(dev, useraddr); break; - case ETHTOOL_GRXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum : - ethtool_op_get_rx_csum)); - break; - case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1846,6 +1843,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_features(dev, useraddr); break; case ETHTOOL_GTXCSUM: + case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: case ETHTOOL_GUFO: @@ -1854,6 +1852,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_one_feature(dev, useraddr, ethcmd); break; case ETHTOOL_STXCSUM: + case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: case ETHTOOL_SUFO: -- cgit v1.2.3-70-g09d2 From 3c7bd1a14071b99d6535b710bc998ae5d3abbb66 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Feb 2011 14:08:44 -0800 Subject: net: Add initial_ref arg to dst_alloc(). This allows avoiding multiple writes to the initial __refcnt. The most simplest cases of wanting an initial reference of "1" in ipv4 and ipv6 have been converted, the rest have been left along and kept at the existing "0". Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/core/dst.c | 4 ++-- net/decnet/dn_route.c | 4 ++-- net/ipv4/route.c | 7 ++----- net/ipv6/route.c | 5 ++--- net/xfrm/xfrm_policy.c | 2 +- 6 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/include/net/dst.h b/include/net/dst.h index e01855de21e..23b564d3e11 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -352,7 +352,7 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void * dst_alloc(struct dst_ops * ops); +extern void *dst_alloc(struct dst_ops * ops, int initial_ref); extern void __dst_free(struct dst_entry * dst); extern struct dst_entry *dst_destroy(struct dst_entry * dst); diff --git a/net/core/dst.c b/net/core/dst.c index c1674fde827..91104d35de7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; -void *dst_alloc(struct dst_ops *ops) +void *dst_alloc(struct dst_ops *ops, int initial_ref) { struct dst_entry *dst; @@ -177,7 +177,7 @@ void *dst_alloc(struct dst_ops *ops) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, 0); + atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 42c9c62d341..06c054d5ccb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1122,7 +1122,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; @@ -1383,7 +1383,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 79a28718102..9841543c468 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1818,12 +1818,10 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) { - struct rtable *rt = dst_alloc(&ipv4_dst_ops); + struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); if (rt) { rt->dst.obsolete = -1; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.flags = DST_HOST | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0); @@ -2679,12 +2677,11 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) - dst_alloc(&ipv4_dst_blackhole_ops); + dst_alloc(&ipv4_dst_blackhole_ops, 1); if (rt) { struct dst_entry *new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad8556e6fd4..7946b53692d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -221,7 +221,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(ops); + return (struct rt6_info *)dst_alloc(ops, 0); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -873,13 +873,12 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl { struct rt6_info *ort = (struct rt6_info *) *dstp; struct rt6_info *rt = (struct rt6_info *) - dst_alloc(&ip6_dst_blackhole_ops); + dst_alloc(&ip6_dst_blackhole_ops, 1); struct dst_entry *new = NULL; if (rt) { new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3ef404c79..3f1257add4f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); + xdst = dst_alloc(dst_ops, 0) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); xdst->flo.ops = &xfrm_bundle_fc_ops; -- cgit v1.2.3-70-g09d2 From dee9f4bceb5fd9dbfcc1567148fccdbf16d6a38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:44:31 -0800 Subject: net: Make flow cache paths use a const struct flowi. Signed-off-by: David S. Miller --- include/net/dst.h | 10 ++++++---- include/net/flow.h | 4 ++-- net/core/flow.c | 14 +++++++------- net/xfrm/xfrm_policy.c | 13 ++++++++----- 4 files changed, 23 insertions(+), 18 deletions(-) (limited to 'net/core') diff --git a/include/net/dst.h b/include/net/dst.h index 23b564d3e11..4fedffd7c56 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -428,20 +428,22 @@ enum { struct flowi; #ifndef CONFIG_XFRM static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, - struct flowi *fl, struct sock *sk, int flags) + const struct flowi *fl, struct sock *sk, + int flags) { return 0; } static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, - struct flowi *fl, struct sock *sk, int flags) + const struct flowi *fl, struct sock *sk, + int flags) { return 0; } #else extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, - struct flowi *fl, struct sock *sk, int flags); + const struct flowi *fl, struct sock *sk, int flags); extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, - struct flowi *fl, struct sock *sk, int flags); + const struct flowi *fl, struct sock *sk, int flags); #endif #endif diff --git a/include/net/flow.h b/include/net/flow.h index f4270d4b22c..f2080e65276 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -102,11 +102,11 @@ struct flow_cache_ops { }; typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, struct flowi *key, u16 family, + struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_object *oldobj, void *ctx); extern struct flow_cache_object *flow_cache_lookup( - struct net *net, struct flowi *key, u16 family, + struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx); extern void flow_cache_flush(void); diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7ffd6..990703b8863 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); @@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t; * important assumptions that we can here, such as alignment and * constant size. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) { - flow_compare_t *k1, *k1_lim, *k2; + const flow_compare_t *k1, *k1_lim, *k2; const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); - k1 = (flow_compare_t *) key1; + k1 = (const flow_compare_t *) key1; k1_lim = k1 + n_elem; - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { struct flow_cache *fc = &flow_cache_global; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ef899a8e33c..28c865adf60 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -954,7 +954,7 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir } static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) { struct xfrm_policy *pol; @@ -990,7 +990,8 @@ static inline int policy_to_flow_dir(int dir) } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, + const struct flowi *fl) { struct xfrm_policy *pol; @@ -1629,7 +1630,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, } static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { struct dst_entry *dst_orig = (struct dst_entry *)ctx; @@ -1733,7 +1734,8 @@ error: * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + const struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; @@ -1889,7 +1891,8 @@ dropdst: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + const struct flowi *fl, struct sock *sk, int flags) { int err = __xfrm_lookup(net, dst_p, fl, sk, flags); -- cgit v1.2.3-70-g09d2 From 8e9b59b219e520cfc2f80af471c6b0e67ad9dd75 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: Fix "(unregistered net_device): Features changed" message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix netdev_update_features() messages on register time by moving the call further in register_netdevice(). When netdev->reg_state != NETREG_REGISTERED, netdev_name() returns "(unregistered netdevice)" even if the dev's name is already filled. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 578415c1ef7..77e5edb724f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5483,8 +5483,6 @@ int register_netdevice(struct net_device *dev) if (!(dev->wanted_features & NETIF_F_SG)) dev->wanted_features &= ~NETIF_F_GSO; - netdev_update_features(dev); - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -5501,6 +5499,8 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; + netdev_update_features(dev); + /* * Default initial state at registry is that the * device is present. -- cgit v1.2.3-70-g09d2 From 14d1232f490c1c696582909fb3b69e67a8d38a34 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: net: avoid initial "Features changed" message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid "Features changed" message and ndo_set_features call on device registration caused by automatic enabling of GSO and GRO. Driver should have enabled hardware offloads it set in features, so the ndo_set_features() is not needed at registration time. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 77e5edb724f..69a3c0817d6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5476,12 +5476,14 @@ int register_netdevice(struct net_device *dev) * software offloads (GSO and GRO). */ dev->hw_features |= NETIF_F_SOFT_FEATURES; - dev->wanted_features = (dev->features & dev->hw_features) - | NETIF_F_SOFT_FEATURES; + dev->features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = dev->features & dev->hw_features; /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) + if (!(dev->wanted_features & NETIF_F_SG)) { dev->wanted_features &= ~NETIF_F_GSO; + dev->features &= ~NETIF_F_GSO; + } /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features -- cgit v1.2.3-70-g09d2 From 4e4db200541d49404ff39ac482efee072dd72144 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: net: Fix ETHTOOL_GFEATURES compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement getting rx checksum state for not updated drivers. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 66cdc76770c..69a3edc182f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -168,6 +168,18 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); #define ETHTOOL_DEV_FEATURE_WORDS 1 +static void ethtool_get_features_compat(struct net_device *dev, + struct ethtool_get_features_block *features) +{ + if (!dev->ethtool_ops) + return; + + /* getting RX checksum */ + if (dev->ethtool_ops->get_rx_csum) + if (dev->ethtool_ops->get_rx_csum(dev)) + features[0].active |= NETIF_F_RXCSUM; +} + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -185,6 +197,8 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) u32 __user *sizeaddr; u32 copy_size; + ethtool_get_features_compat(dev, features); + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); if (get_user(copy_size, sizeaddr)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 39fc0ce5710c53bad14aaba1a789eec810c556f9 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Tue, 22 Feb 2011 16:52:29 +0000 Subject: net: Implement SFEATURES compatibility for not updated drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use discrete setting ops for not updated drivers. This will not make them conform to full G/SFEATURES semantics, though. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 ++++ net/core/ethtool.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'net/core') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 54d776c2c1b..aac3e2eeb4f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -591,6 +591,9 @@ struct ethtool_sfeatures { * Probably there are other device-specific constraints on some features * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered * here as though ignored bits were cleared. + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling + * compatibility functions. Requested offload state cannot be properly + * managed by kernel. * * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands @@ -600,10 +603,12 @@ struct ethtool_sfeatures { enum ethtool_sfeatures_retval_bits { ETHTOOL_F_UNSUPPORTED__BIT, ETHTOOL_F_WISH__BIT, + ETHTOOL_F_COMPAT__BIT, }; #define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) +#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) #ifdef __KERNEL__ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 69a3edc182f..c1a71bb738d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -178,6 +178,64 @@ static void ethtool_get_features_compat(struct net_device *dev, if (dev->ethtool_ops->get_rx_csum) if (dev->ethtool_ops->get_rx_csum(dev)) features[0].active |= NETIF_F_RXCSUM; + + /* mark legacy-changeable features */ + if (dev->ethtool_ops->set_sg) + features[0].available |= NETIF_F_SG; + if (dev->ethtool_ops->set_tx_csum) + features[0].available |= NETIF_F_ALL_CSUM; + if (dev->ethtool_ops->set_tso) + features[0].available |= NETIF_F_ALL_TSO; + if (dev->ethtool_ops->set_rx_csum) + features[0].available |= NETIF_F_RXCSUM; + if (dev->ethtool_ops->set_flags) + features[0].available |= flags_dup_features; +} + +static int ethtool_set_feature_compat(struct net_device *dev, + int (*legacy_set)(struct net_device *, u32), + struct ethtool_set_features_block *features, u32 mask) +{ + u32 do_set; + + if (!legacy_set) + return 0; + + if (!(features[0].valid & mask)) + return 0; + + features[0].valid &= ~mask; + + do_set = !!(features[0].requested & mask); + + if (legacy_set(dev, do_set) < 0) + netdev_info(dev, + "Legacy feature change (%s) failed for 0x%08x\n", + do_set ? "set" : "clear", mask); + + return 1; +} + +static int ethtool_set_features_compat(struct net_device *dev, + struct ethtool_set_features_block *features) +{ + int compat; + + if (!dev->ethtool_ops) + return 0; + + compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, + features, NETIF_F_SG); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, + features, NETIF_F_ALL_CSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, + features, NETIF_F_ALL_TSO); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, + features, NETIF_F_RXCSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags, + features, flags_dup_features); + + return compat; } static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -234,6 +292,9 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) return -EINVAL; + if (ethtool_set_features_compat(dev, features)) + ret |= ETHTOOL_F_COMPAT; + if (features[0].valid & ~dev->hw_features) { features[0].valid &= dev->hw_features; ret |= ETHTOOL_F_UNSUPPORTED; -- cgit v1.2.3-70-g09d2 From 080e4130b1fb6a02e75149a1cccc8192e734713d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 17 Feb 2011 23:43:33 +0000 Subject: netpoll: remove IFF_IN_NETPOLL flag V4: rebase to net-next-2.6 This patch removes the flag IFF_IN_NETPOLL, we don't need it any more since we have netpoll_tx_running() now. Signed-off-by: WANG Cong Acked-by: Neil Horman Cc: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 ++---- drivers/net/bonding/bonding.h | 2 +- include/linux/if.h | 9 ++++----- net/core/netpoll.c | 2 -- 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2ed662464ca..c75126ddc64 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -423,11 +423,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, { skb->dev = slave_dev; skb->priority = 1; - if (unlikely(netpoll_tx_running(slave_dev))) { - slave_dev->priv_flags |= IFF_IN_NETPOLL; + if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); - slave_dev->priv_flags &= ~IFF_IN_NETPOLL; - } else + else dev_queue_xmit(skb); return 0; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 0a3e00b220b..a401b8df84f 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -133,7 +133,7 @@ static inline void unblock_netpoll_tx(void) static inline int is_netpoll_tx_blocked(struct net_device *dev) { - if (unlikely(dev->priv_flags & IFF_IN_NETPOLL)) + if (unlikely(netpoll_tx_running(dev))) return atomic_read(&netpoll_block_tx); return 0; } diff --git a/include/linux/if.h b/include/linux/if.h index 12395992774..3bc63e6a02f 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -71,11 +71,10 @@ * release skb->dst */ #define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ -#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */ -#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ -#define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ -#define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ -#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch +#define IFF_DISABLE_NETPOLL 0x1000 /* disable netpoll at run-time */ +#define IFF_MACVLAN_PORT 0x2000 /* device used as macvlan port */ +#define IFF_BRIDGE_PORT 0x4000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x8000 /* device used as Open vSwitch * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 02dc2cbcbe8..f68e6949294 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -313,9 +313,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { - dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); - dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } -- cgit v1.2.3-70-g09d2 From 5a698af53fb85b92d6462939a2c75ec4c7233bb9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 17 Feb 2011 23:43:34 +0000 Subject: bond: service netpoll arp queue on master device Neil pointed out that we can't send ARP reply on behalf of slaves, we need to move the arp queue to their bond device. Signed-off-by: WANG Cong Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index f68e6949294..06be2431753 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + if (dev->priv_flags & IFF_SLAVE) { + if (dev->npinfo) { + struct net_device *bond_dev = dev->master; + struct sk_buff *skb; + while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + skb->dev = bond_dev; + skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + } + } + } + service_arp_queue(dev->npinfo); zap_completion_queue(); -- cgit v1.2.3-70-g09d2 From 63d8ea7f93e1fb9d1aa9509ab3e1a71199245c80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Feb 2011 10:48:59 -0800 Subject: net: Forgot to commit net/core/dev.c part of Jiri's ->rx_handler patch. Signed-off-by: David S. Miller --- net/core/dev.c | 119 +++++++++++++++------------------------------------------ 1 file changed, 31 insertions(+), 88 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 69a3c0817d6..30440e7b296 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3096,63 +3096,31 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, - struct net_device *master) +static void vlan_on_bond_hook(struct sk_buff *skb) { - if (skb->pkt_type == PACKET_HOST) { - u16 *dest = (u16 *) eth_hdr(skb)->h_dest; + /* + * Make sure ARP frames received on VLAN interfaces stacked on + * bonding interfaces still make their way to any base bonding + * device that may have registered for a specific ptype. + */ + if (skb->dev->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && + skb->protocol == htons(ETH_P_ARP)) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - memcpy(dest, master->dev_addr, ETH_ALEN); + if (!skb2) + return; + skb2->dev = vlan_dev_real_dev(skb->dev); + netif_rx(skb2); } } -/* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and - * ARP on active-backup slaves with arp_validate enabled. - */ -static int __skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master) -{ - struct net_device *dev = skb->dev; - - if (master->priv_flags & IFF_MASTER_ARPMON) - dev->last_rx = jiffies; - - if ((master->priv_flags & IFF_MASTER_ALB) && - (master->priv_flags & IFF_BRIDGE_PORT)) { - /* Do address unmangle. The local destination address - * will be always the one master has. Provides the right - * functionality in a bridge. - */ - skb_bond_set_mac_by_master(skb, master); - } - - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __cpu_to_be16(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __cpu_to_be16(ETH_P_SLOW)) - return 0; - - return 1; - } - return 0; -} - static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *null_or_orig; - struct net_device *orig_or_bond; + struct net_device *null_or_dev; int ret = NET_RX_DROP; __be16 type; @@ -3167,32 +3135,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; - - /* - * bonding note: skbs received on inactive slaves should only - * be delivered to pkt handlers that are exact matches. Also - * the deliver_no_wcard flag will be set. If packet handlers - * are sensitive to duplicate packets these skbs will need to - * be dropped at the handler. - */ - null_or_orig = NULL; orig_dev = skb->dev; - if (skb->deliver_no_wcard) - null_or_orig = orig_dev; - else if (netif_is_bond_slave(orig_dev)) { - struct net_device *bond_master = ACCESS_ONCE(orig_dev->master); - - if (likely(bond_master)) { - if (__skb_bond_should_drop(skb, bond_master)) { - skb->deliver_no_wcard = 1; - /* deliver only exact match */ - null_or_orig = orig_dev; - } else - skb->dev = bond_master; - } - } - __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -3201,6 +3145,10 @@ static int __netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); +another_round: + + __this_cpu_inc(softnet_data.processed); + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3209,8 +3157,7 @@ static int __netif_receive_skb(struct sk_buff *skb) #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { + if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3224,16 +3171,20 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - /* Handle special case of bridge or macvlan */ rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { + struct net_device *prev_dev; + if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } + prev_dev = skb->dev; skb = rx_handler(skb); if (!skb) goto out; + if (skb->dev != prev_dev) + goto another_round; } if (vlan_tx_tag_present(skb)) { @@ -3248,24 +3199,16 @@ ncls: goto out; } - /* - * Make sure frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. The - * handler may have to adjust skb->dev and orig_dev. - */ - orig_or_bond = orig_dev; - if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && - (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - orig_or_bond = vlan_dev_real_dev(skb->dev); - } + vlan_on_bond_hook(skb); + + /* deliver only exact match when indicated */ + null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && (ptype->dev == null_or_orig || - ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == orig_or_bond)) { + if (ptype->type == type && + (ptype->dev == null_or_dev || ptype->dev == skb->dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.2.3-70-g09d2 From 5a2ef92023506d4e9cd13617b5a46b4d0f1b6747 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Remove unused sk_sndmsg_* from UFO UFO doesn't really use the sk_sndmsg_* parameters so touching them is pointless. It can't use them anyway since the whole point of UFO is to use the original pages without copying. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 --- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - 3 files changed, 5 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 14cf560b4a3..1eb526a848f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2434,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -ENOMEM; /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, frg_cnt, page, 0, 0); skb->truesize += PAGE_SIZE; atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); @@ -2455,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -EFAULT; /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b3..d3a4540cd30 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -767,7 +767,6 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 065b3f7614f..5c618f20523 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1061,7 +1061,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; } err = skb_append_datato_frags(sk,skb, getfrag, from, -- cgit v1.2.3-70-g09d2 From e3f48d37cf87a4a94e9f05fddc39b0e5f2307c27 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 28 Feb 2011 20:26:31 +0000 Subject: net: allow handlers to be processed for orig_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was there before, I forgot about this. Allows deliveries to ptype_base handlers registered for orig_dev. I presume this is still desired. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 30440e7b296..9f66de9c057 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3208,7 +3208,8 @@ ncls: list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && - (ptype->dev == null_or_dev || ptype->dev == skb->dev)) { + (ptype->dev == null_or_dev || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.2.3-70-g09d2 From 1d28f42c1bd4bb2363d88df74d0128b4da135b4a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 00:29:39 -0500 Subject: net: Put flowi_* prefix on AF independent members of struct flowi I intend to turn struct flowi into a union of AF specific flowi structs. There will be a common structure that each variant includes first, much like struct sock_common. This is the first step to move in that direction. Signed-off-by: David S. Miller --- drivers/infiniband/core/addr.c | 2 +- drivers/net/cnic.c | 2 +- include/net/dn_route.h | 4 +- include/net/flow.h | 22 ++++----- include/net/route.h | 36 +++++++-------- include/net/xfrm.h | 4 +- net/core/fib_rules.c | 6 +-- net/dccp/ipv4.c | 17 +++---- net/dccp/ipv6.c | 20 ++++----- net/decnet/af_decnet.c | 4 +- net/decnet/dn_fib.c | 4 +- net/decnet/dn_nsp_out.c | 4 +- net/decnet/dn_route.c | 96 +++++++++++++++++++++------------------- net/ipv4/fib_frontend.c | 12 ++--- net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 12 ++--- net/ipv4/inet_connection_sock.c | 22 ++++----- net/ipv4/ip_output.c | 18 ++++---- net/ipv4/ipmr.c | 12 ++--- net/ipv4/netfilter.c | 6 +-- net/ipv4/raw.c | 10 ++--- net/ipv4/route.c | 72 +++++++++++++++--------------- net/ipv4/syncookies.c | 20 +++++---- net/ipv4/udp.c | 21 ++++----- net/ipv4/xfrm4_policy.c | 10 ++--- net/ipv4/xfrm4_state.c | 4 +- net/ipv6/af_inet6.c | 6 +-- net/ipv6/datagram.c | 20 ++++----- net/ipv6/icmp.c | 24 +++++----- net/ipv6/inet6_connection_sock.c | 12 ++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6_output.c | 10 ++--- net/ipv6/ip6_tunnel.c | 8 ++-- net/ipv6/ip6mr.c | 22 ++++----- net/ipv6/ipv6_sockglue.c | 4 +- net/ipv6/mip6.c | 6 +-- net/ipv6/netfilter.c | 4 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 20 ++++----- net/ipv6/route.c | 20 ++++----- net/ipv6/syncookies.c | 6 +-- net/ipv6/tcp_ipv6.c | 22 ++++----- net/ipv6/udp.c | 20 ++++----- net/ipv6/xfrm6_policy.c | 10 ++--- net/ipv6/xfrm6_state.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/netfilter/xt_TEE.c | 4 +- net/sctp/ipv6.c | 8 ++-- net/sctp/protocol.c | 4 +- net/xfrm/xfrm_policy.c | 18 ++++---- net/xfrm/xfrm_state.c | 2 +- security/security.c | 4 +- security/selinux/hooks.c | 2 +- security/selinux/xfrm.c | 4 +- 56 files changed, 365 insertions(+), 351 deletions(-) (limited to 'net/core') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1742f72fbd5..3c2b309ab89 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -239,7 +239,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, memset(&fl, 0, sizeof fl); ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); - fl.oif = addr->bound_dev_if; + fl.flowi_oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); if ((ret = dst->error)) diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c index 65832951fe0..c8922f69705 100644 --- a/drivers/net/cnic.c +++ b/drivers/net/cnic.c @@ -3429,7 +3429,7 @@ static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr, memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &dst_addr->sin6_addr); if (ipv6_addr_type(&fl.fl6_dst) & IPV6_ADDR_LINKLOCAL) - fl.oif = dst_addr->sin6_scope_id; + fl.flowi_oif = dst_addr->sin6_scope_id; *dst = ip6_route_output(&init_net, NULL, &fl); if (*dst) diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 9b185df265f..1f59005e497 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -82,12 +82,12 @@ struct dn_route { static inline bool dn_is_input_route(struct dn_route *rt) { - return rt->fl.iif != 0; + return rt->fl.flowi_iif != 0; } static inline bool dn_is_output_route(struct dn_route *rt) { - return rt->fl.iif == 0; + return rt->fl.flowi_iif == 0; } extern void dn_route_init(void); diff --git a/include/net/flow.h b/include/net/flow.h index a661fd6f76b..8c4dbd07849 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,17 +11,17 @@ #include struct flowi { - int oif; - int iif; - __u32 mark; - __u8 tos; - __u8 scope; - __u8 proto; - __u8 flags; + int flowi_oif; + int flowi_iif; + __u32 flowi_mark; + __u8 flowi_tos; + __u8 flowi_scope; + __u8 flowi_proto; + __u8 flowi_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 - __u32 secid; + __u32 flowi_secid; union { struct { @@ -49,8 +49,8 @@ struct flowi { #define fl6_flowlabel nl_u.ip6_u.flowlabel #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr -#define fl4_tos tos -#define fl4_scope scope +#define fl4_tos flowi_tos +#define fl4_scope flowi_scope union { struct { @@ -116,7 +116,7 @@ extern atomic_t flow_cache_genid; static inline int flow_cache_uli_match(const struct flowi *fl1, const struct flowi *fl2) { - return (fl1->proto == fl2->proto && + return (fl1->flowi_proto == fl2->flowi_proto && !memcmp(&fl1->uli_u, &fl2->uli_u, sizeof(fl1->uli_u))); } diff --git a/include/net/route.h b/include/net/route.h index f140f4130fe..3d814f84abd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -136,7 +136,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, __be32 saddr, u8 tos, int oif) { struct flowi fl = { - .oif = oif, + .flowi_oif = oif, .fl4_dst = daddr, .fl4_src = saddr, .fl4_tos = tos, @@ -150,13 +150,13 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct sock __u8 proto, __u8 tos, int oif) { struct flowi fl = { - .oif = oif, - .flags = sk ? inet_sk_flowi_flags(sk) : 0, - .mark = sk ? sk->sk_mark : 0, + .flowi_oif = oif, + .flowi_flags = sk ? inet_sk_flowi_flags(sk) : 0, + .flowi_mark = sk ? sk->sk_mark : 0, .fl4_dst = daddr, .fl4_src = saddr, .fl4_tos = tos, - .proto = proto, + .flowi_proto = proto, .fl_ip_dport = dport, .fl_ip_sport = sport, }; @@ -170,11 +170,11 @@ static inline struct rtable *ip_route_output_gre(struct net *net, __be32 gre_key, __u8 tos, int oif) { struct flowi fl = { - .oif = oif, + .flowi_oif = oif, .fl4_dst = daddr, .fl4_src = saddr, .fl4_tos = tos, - .proto = IPPROTO_GRE, + .flowi_proto = IPPROTO_GRE, .fl_gre_key = gre_key, }; return ip_route_output_key(net, &fl); @@ -228,23 +228,23 @@ static inline struct rtable *ip_route_connect(__be32 dst, __be32 src, u32 tos, __be16 sport, __be16 dport, struct sock *sk, bool can_sleep) { - struct flowi fl = { .oif = oif, - .mark = sk->sk_mark, + struct flowi fl = { .flowi_oif = oif, + .flowi_mark = sk->sk_mark, .fl4_dst = dst, .fl4_src = src, .fl4_tos = tos, - .proto = protocol, + .flowi_proto = protocol, .fl_ip_sport = sport, .fl_ip_dport = dport }; struct net *net = sock_net(sk); struct rtable *rt; if (inet_sk(sk)->transparent) - fl.flags |= FLOWI_FLAG_ANYSRC; + fl.flowi_flags |= FLOWI_FLAG_ANYSRC; if (protocol == IPPROTO_TCP) - fl.flags |= FLOWI_FLAG_PRECOW_METRICS; + fl.flowi_flags |= FLOWI_FLAG_PRECOW_METRICS; if (can_sleep) - fl.flags |= FLOWI_FLAG_CAN_SLEEP; + fl.flowi_flags |= FLOWI_FLAG_CAN_SLEEP; if (!dst || !src) { rt = __ip_route_output_key(net, &fl); @@ -264,19 +264,19 @@ static inline struct rtable *ip_route_newports(struct rtable *rt, __be16 dport, struct sock *sk) { if (sport != orig_sport || dport != orig_dport) { - struct flowi fl = { .oif = rt->rt_oif, - .mark = rt->rt_mark, + struct flowi fl = { .flowi_oif = rt->rt_oif, + .flowi_mark = rt->rt_mark, .fl4_dst = rt->rt_key_dst, .fl4_src = rt->rt_key_src, .fl4_tos = rt->rt_tos, - .proto = protocol, + .flowi_proto = protocol, .fl_ip_sport = sport, .fl_ip_dport = dport }; if (inet_sk(sk)->transparent) - fl.flags |= FLOWI_FLAG_ANYSRC; + fl.flowi_flags |= FLOWI_FLAG_ANYSRC; if (protocol == IPPROTO_TCP) - fl.flags |= FLOWI_FLAG_PRECOW_METRICS; + fl.flowi_flags |= FLOWI_FLAG_PRECOW_METRICS; ip_rt_put(rt); security_sk_classify_flow(sk, &fl); return ip_route_output_flow(sock_net(sk), &fl, sk); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d5dcf397463..d5a12d10a0d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -803,7 +803,7 @@ static __inline__ __be16 xfrm_flowi_sport(const struct flowi *fl) { __be16 port; - switch(fl->proto) { + switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: @@ -830,7 +830,7 @@ static __inline__ __be16 xfrm_flowi_dport(const struct flowi *fl) { __be16 port; - switch(fl->proto) { + switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a20e5d3bbfa..8248ebb5891 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->oif)) + if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; - if ((rule->mark ^ fl->mark) & rule->mark_mask) + if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; ret = ops->match(rule, fl, flags); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7882377bc62..09a09911c5e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -465,14 +465,15 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport - }; + struct flowi fl = { + .flowi_oif = skb_rtable(skb)->rt_iif, + .fl4_dst = ip_hdr(skb)->saddr, + .fl4_src = ip_hdr(skb)->daddr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = sk->sk_protocol, + .fl_ip_sport = dccp_hdr(skb)->dccph_dport, + .fl_ip_dport = dccp_hdr(skb)->dccph_sport, + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_flow(net, &fl, sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5efc57f5e60..5209ee7a3dc 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -154,10 +154,10 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, for now. */ memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); @@ -248,11 +248,11 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.fl6_flowlabel = 0; - fl.oif = ireq6->iif; + fl.flowi_oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -321,8 +321,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); + fl.flowi_proto = IPPROTO_DCCP; + fl.flowi_oif = inet6_iif(rxskb); fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); @@ -530,11 +530,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); @@ -953,10 +953,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2af15b15d1f..aafd15a0157 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -948,11 +948,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, err = -EHOSTUNREACH; memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fld_dst = dn_saddr2dn(&scp->peer); fl.fld_src = dn_saddr2dn(&scp->addr); dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) goto out; sk->sk_route_caps = sk->sk_dst_cache->dev->features; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 0ef0a81bcd7..4dfffa0b67a 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -223,7 +223,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct memset(&fl, 0, sizeof(fl)); fl.fld_dst = nh->nh_gw; - fl.oif = nh->nh_oif; + fl.flowi_oif = nh->nh_oif; fl.fld_scope = r->rtm_scope + 1; if (fl.fld_scope < RT_SCOPE_LINK) @@ -424,7 +424,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * for_nexthops(fi) { if (nh->nh_flags & RTNH_F_DEAD) continue; - if (!fl->oif || fl->oif == nh->nh_oif) + if (!fl->flowi_oif || fl->flowi_oif == nh->nh_oif) break; } if (nhsel < fi->fib_nhs) { diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 2ef115277be..b3d66742a01 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -92,11 +92,11 @@ try_again: } memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fld_src = dn_saddr2dn(&scp->addr); fl.fld_dst = dn_saddr2dn(&scp->peer); dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { dst = sk_dst_get(sk); sk->sk_route_caps = dst->dev->features; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 484fdbf92bd..d74d34b93f8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -286,10 +286,10 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((fl1->fld_dst ^ fl2->fld_dst) | (fl1->fld_src ^ fl2->fld_src) | - (fl1->mark ^ fl2->mark) | + (fl1->flowi_mark ^ fl2->flowi_mark) | (fl1->fld_scope ^ fl2->fld_scope) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + (fl1->flowi_oif ^ fl2->flowi_oif) | + (fl1->flowi_iif ^ fl2->flowi_iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) @@ -905,12 +905,14 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .fld_dst = oldflp->fld_dst, - .fld_src = oldflp->fld_src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = oldflp->mark, - .iif = init_net.loopback_dev->ifindex, - .oif = oldflp->oif }; + struct flowi fl = { + .fld_dst = oldflp->fld_dst, + .fld_src = oldflp->fld_src, + .fld_scope = RT_SCOPE_UNIVERSE, + .flowi_mark = oldflp->flowi_mark, + .flowi_iif = init_net.loopback_dev->ifindex, + .flowi_oif = oldflp->flowi_oif, + }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; struct neighbour *neigh = NULL; @@ -926,11 +928,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), le16_to_cpu(oldflp->fld_src), - oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); + oldflp->flowi_mark, init_net.loopback_dev->ifindex, oldflp->flowi_oif); /* If we have an output interface, verify its a DECnet device */ - if (oldflp->oif) { - dev_out = dev_get_by_index(&init_net, oldflp->oif); + if (oldflp->flowi_oif) { + dev_out = dev_get_by_index(&init_net, oldflp->flowi_oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -988,7 +990,7 @@ source_ok: if (!fl.fld_dst) goto out; } - fl.oif = init_net.loopback_dev->ifindex; + fl.flowi_oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -998,7 +1000,7 @@ source_ok: "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), - fl.oif, try_hard); + fl.flowi_oif, try_hard); /* * N.B. If the kernel is compiled without router support then @@ -1023,8 +1025,8 @@ source_ok: if (!try_hard) { neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); if (neigh) { - if ((oldflp->oif && - (neigh->dev->ifindex != oldflp->oif)) || + if ((oldflp->flowi_oif && + (neigh->dev->ifindex != oldflp->flowi_oif)) || (oldflp->fld_src && (!dn_dev_islocal(neigh->dev, oldflp->fld_src)))) { @@ -1078,7 +1080,7 @@ select_source: if (fl.fld_src == 0 && res.type != RTN_LOCAL) goto e_addr; } - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; goto make_route; } free_res = 1; @@ -1093,14 +1095,14 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); res.fi = NULL; goto make_route; } - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) dn_fib_select_multipath(&fl, &res); /* @@ -1115,7 +1117,7 @@ select_source: dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; gateway = DN_FIB_RES_GW(res); make_route: @@ -1131,9 +1133,9 @@ make_route: rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; - rt->fl.oif = oldflp->oif; - rt->fl.iif = 0; - rt->fl.mark = oldflp->mark; + rt->fl.flowi_oif = oldflp->flowi_oif; + rt->fl.flowi_iif = 0; + rt->fl.flowi_mark = oldflp->flowi_mark; rt->rt_saddr = fl.fld_src; rt->rt_daddr = fl.fld_dst; @@ -1201,9 +1203,9 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl rt = rcu_dereference_bh(rt->dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && - (flp->mark == rt->fl.mark) && + (flp->flowi_mark == rt->fl.flowi_mark) && dn_is_output_route(rt) && - (rt->fl.oif == flp->oif)) { + (rt->fl.flowi_oif == flp->flowi_oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; @@ -1221,7 +1223,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f int err; err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->proto) { + if (err == 0 && flp->flowi_proto) { *pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); @@ -1236,9 +1238,9 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock int err; err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->proto) { + if (err == 0 && fl->flowi_proto) { if (!(flags & MSG_DONTWAIT)) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; *pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); @@ -1260,11 +1262,13 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .fld_dst = cb->dst, - .fld_src = cb->src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = skb->dev->ifindex }; + struct flowi fl = { + .fld_dst = cb->dst, + .fld_src = cb->src, + .fld_scope = RT_SCOPE_UNIVERSE, + .flowi_mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; int free_res = 0; @@ -1343,7 +1347,7 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->parms.forwarding == 0) goto e_inval; - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) dn_fib_select_multipath(&fl, &res); /* @@ -1408,9 +1412,9 @@ make_route: rt->fl.fld_src = cb->src; rt->fl.fld_dst = cb->dst; - rt->fl.oif = 0; - rt->fl.iif = in_dev->ifindex; - rt->fl.mark = fl.mark; + rt->fl.flowi_oif = 0; + rt->fl.flowi_iif = in_dev->ifindex; + rt->fl.flowi_mark = fl.flowi_mark; rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; @@ -1482,9 +1486,9 @@ static int dn_route_input(struct sk_buff *skb) rt = rcu_dereference(rt->dst.dn_next)) { if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && - (rt->fl.oif == 0) && - (rt->fl.mark == skb->mark) && - (rt->fl.iif == cb->iif)) { + (rt->fl.flowi_oif == 0) && + (rt->fl.flowi_mark == skb->mark) && + (rt->fl.flowi_iif == cb->iif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); @@ -1541,7 +1545,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, rt->dst.error) < 0) goto rtattr_failure; if (dn_is_input_route(rt)) - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.flowi_iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -1570,7 +1574,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void return -EINVAL; memset(&fl, 0, sizeof(fl)); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) @@ -1583,11 +1587,11 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rta[RTA_DST-1]) memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); if (rta[RTA_IIF-1]) - memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + memcpy(&fl.flowi_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - if (fl.iif) { + if (fl.flowi_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.flowi_iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1611,7 +1615,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + fl.flowi_oif = oif; err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe10bcd0f30..76105284a81 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -200,9 +200,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, int ret; struct net *net; - fl.oif = 0; - fl.iif = oif; - fl.mark = mark; + fl.flowi_oif = 0; + fl.flowi_iif = oif; + fl.flowi_mark = mark; fl.fl4_dst = src; fl.fl4_src = dst; fl.fl4_tos = tos; @@ -215,7 +215,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl.mark = 0; + fl.flowi_mark = 0; } if (in_dev == NULL) @@ -253,7 +253,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (rpf == 1) goto e_rpf; - fl.oif = dev->ifindex; + fl.flowi_oif = dev->ifindex; ret = 0; if (fib_lookup(net, &fl, &res) == 0) { @@ -797,7 +797,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) struct fib_result res; struct flowi fl = { - .mark = frn->fl_mark, + .flowi_mark = frn->fl_mark, .fl4_dst = frn->fl_addr, .fl4_tos = frn->fl_tos, .fl4_scope = frn->fl_scope, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b5d523b911e..79179ade529 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,7 +563,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct flowi fl = { .fl4_dst = nh->nh_gw, .fl4_scope = cfg->fc_scope + 1, - .oif = nh->nh_oif, + .flowi_oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a4109a54477..d5ff80ef001 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1379,7 +1379,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (nh->nh_flags & RTNH_F_DEAD) continue; - if (flp->oif && flp->oif != nh->nh_oif) + if (flp->flowi_oif && flp->flowi_oif != nh->nh_oif) continue; #ifdef CONFIG_IP_FIB_TRIE_STATS diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1771ce66254..3fde7f23c70 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,10 +353,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .fl4_dst= daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .proto = IPPROTO_ICMP }; + struct flowi fl = { + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi_proto = IPPROTO_ICMP, + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) @@ -381,7 +383,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, param->replyopts.faddr : iph->saddr), .fl4_src = saddr, .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_ICMP, + .flowi_proto = IPPROTO_ICMP, .fl_icmp_type = type, .fl_icmp_code = code, }; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4e301a61c5..97081702dff 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,16 +356,18 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet_sk(sk)->inet_sport, - .fl_ip_dport = ireq->rmt_port }; + struct flowi fl = { + .flowi_oif = sk->sk_bound_dev_if, + .flowi_mark = sk->sk_mark, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = sk->sk_protocol, + .flowi_flags = inet_sk_flowi_flags(sk), + .fl_ip_sport = inet_sk(sk)->inet_sport, + .fl_ip_dport = ireq->rmt_port, + }; struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 916152dbdce..e35ca40df03 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,14 +1474,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, - .proto = sk->sk_protocol, - .flags = ip_reply_arg_flowi_flags(arg) }; + struct flowi fl = { + .flowi_oif = arg->bound_dev_if, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, + .flowi_proto = sk->sk_protocol, + .flowi_flags = ip_reply_arg_flowi_flags(arg), + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_key(sock_net(sk), &fl); if (IS_ERR(rt)) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 594a3004367..3b72b0a26d7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -436,9 +436,9 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct net *net = dev_net(dev); struct mr_table *mrt; struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + .flowi_oif = dev->ifindex, + .flowi_iif = skb->skb_iif, + .flowi_mark = skb->mark, }; int err; @@ -1793,9 +1793,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) .fl4_dst = rt->rt_key_dst, .fl4_src = rt->rt_key_src, .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, + .flowi_oif = rt->rt_oif, + .flowi_iif = rt->rt_iif, + .flowi_mark = rt->rt_mark, }; struct mr_table *mrt; int err; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 67bf709180d..6f40ba511c6 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -35,9 +35,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (type == RTN_LOCAL) fl.fl4_src = iph->saddr; fl.fl4_tos = RT_TOS(iph->tos); - fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.mark = skb->mark; - fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + fl.flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl.flowi_mark = skb->mark; + fl.flowi_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) return -1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 467d570d087..b42b7cd56c0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl->flowi_proto) { case IPPROTO_ICMP: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -548,14 +548,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, + struct flowi fl = { .flowi_oif = ipc.oif, + .flowi_mark = sk->sk_mark, .fl4_dst = daddr, .fl4_src = saddr, .fl4_tos = tos, - .proto = inet->hdrincl ? IPPROTO_RAW : + .flowi_proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - .flags = FLOWI_FLAG_CAN_SLEEP, + .flowi_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9c17e32d562..c9aa4f9effe 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1701,9 +1701,9 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) .fl4_dst = rt->rt_key_dst, .fl4_src = rt->rt_key_src, .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, + .flowi_oif = rt->rt_oif, + .flowi_iif = rt->rt_iif, + .flowi_mark = rt->rt_mark, }; rcu_read_lock(); @@ -1766,7 +1766,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (oldflp && (oldflp->flags & FLOWI_FLAG_PRECOW_METRICS)) + if (oldflp && (oldflp->flowi_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -2057,9 +2057,9 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif, + hash = rt_hash(daddr, saddr, fl->flowi_iif, rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl->iif); + rth = rt_intern_hash(hash, rth, skb, fl->flowi_iif); if (IS_ERR(rth)) return PTR_ERR(rth); return 0; @@ -2118,9 +2118,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - fl.oif = 0; - fl.iif = dev->ifindex; - fl.mark = skb->mark; + fl.flowi_oif = 0; + fl.flowi_iif = dev->ifindex; + fl.flowi_mark = skb->mark; fl.fl4_dst = daddr; fl.fl4_src = saddr; fl.fl4_tos = tos; @@ -2205,8 +2205,8 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl.iif); + hash = rt_hash(daddr, saddr, fl.flowi_iif, rt_genid(net)); + rth = rt_intern_hash(hash, rth, skb, fl.flowi_iif); err = 0; if (IS_ERR(rth)) err = PTR_ERR(rth); @@ -2369,7 +2369,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc_rcu(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->proto)) + oldflp->flowi_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. @@ -2387,8 +2387,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_key_dst = oldflp->fl4_dst; rth->rt_tos = tos; rth->rt_key_src = oldflp->fl4_src; - rth->rt_oif = oldflp->oif; - rth->rt_mark = oldflp->mark; + rth->rt_oif = oldflp->flowi_oif; + rth->rt_mark = oldflp->flowi_mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = 0; @@ -2452,9 +2452,9 @@ static struct rtable *ip_route_output_slow(struct net *net, res.r = NULL; #endif - fl.oif = oldflp->oif; - fl.iif = net->loopback_dev->ifindex; - fl.mark = oldflp->mark; + fl.flowi_oif = oldflp->flowi_oif; + fl.flowi_iif = net->loopback_dev->ifindex; + fl.flowi_mark = oldflp->flowi_mark; fl.fl4_dst = oldflp->fl4_dst; fl.fl4_src = oldflp->fl4_src; fl.fl4_tos = tos & IPTOS_RT_MASK; @@ -2477,7 +2477,7 @@ static struct rtable *ip_route_output_slow(struct net *net, of another iface. --ANK */ - if (oldflp->oif == 0 && + if (oldflp->flowi_oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || ipv4_is_lbcast(oldflp->fl4_dst))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ @@ -2500,11 +2500,11 @@ static struct rtable *ip_route_output_slow(struct net *net, Luckily, this hack is good workaround. */ - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + if (!(oldflp->flowi_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ if (!__ip_dev_find(net, oldflp->fl4_src, false)) goto out; @@ -2512,8 +2512,8 @@ static struct rtable *ip_route_output_slow(struct net *net, } - if (oldflp->oif) { - dev_out = dev_get_by_index_rcu(net, oldflp->oif); + if (oldflp->flowi_oif) { + dev_out = dev_get_by_index_rcu(net, oldflp->flowi_oif); rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; @@ -2545,7 +2545,7 @@ static struct rtable *ip_route_output_slow(struct net *net, if (!fl.fl4_dst) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl.oif = net->loopback_dev->ifindex; + fl.flowi_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2553,7 +2553,7 @@ static struct rtable *ip_route_output_slow(struct net *net, if (fib_lookup(net, &fl, &res)) { res.fi = NULL; - if (oldflp->oif) { + if (oldflp->flowi_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2590,25 +2590,25 @@ static struct rtable *ip_route_output_slow(struct net *net, fl.fl4_src = fl.fl4_dst; } dev_out = net->loopback_dev; - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) + if (!res.prefixlen && res.type == RTN_UNICAST && !fl.flowi_oif) fib_select_default(&res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; make_route: @@ -2616,9 +2616,9 @@ make_route: if (!IS_ERR(rth)) { unsigned int hash; - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->flowi_oif, rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, oldflp->oif); + rth = rt_intern_hash(hash, rth, NULL, oldflp->flowi_oif); } out: @@ -2634,7 +2634,7 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (!rt_caching(net)) goto slow_output; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->flowi_oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; @@ -2642,8 +2642,8 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (rth->rt_key_dst == flp->fl4_dst && rth->rt_key_src == flp->fl4_src && rt_is_output_route(rth) && - rth->rt_oif == flp->oif && - rth->rt_mark == flp->mark && + rth->rt_oif == flp->flowi_oif && + rth->rt_mark == flp->flowi_mark && !((rth->rt_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2741,7 +2741,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, if (IS_ERR(rt)) return rt; - if (flp->proto) { + if (flp->flowi_proto) { if (!flp->fl4_src) flp->fl4_src = rt->rt_src; if (!flp->fl4_dst) @@ -2917,8 +2917,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .fl4_dst = dst, .fl4_src = src, .fl4_tos = rtm->rtm_tos, - .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .mark = mark, + .flowi_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .flowi_mark = mark, }; rt = ip_route_output_key(net, &fl); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0ad6ddf638a..98d47dc60c8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,15 +345,17 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = IPPROTO_TCP, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = th->dest, - .fl_ip_dport = th->source }; + struct flowi fl = { + .flowi_mark = sk->sk_mark, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = IPPROTO_TCP, + .flowi_flags = inet_sk_flowi_flags(sk), + .fl_ip_sport = th->dest, + .fl_ip_dport = th->source, + }; security_req_classify_flow(req, &fl); rt = ip_route_output_key(sock_net(sk), &fl); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c9a73e5b26a..e10f62e6c07 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -908,16 +908,17 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = faddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = sk->sk_protocol, - .flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport + struct flowi fl = { + .flowi_oif = ipc.oif, + .flowi_mark = sk->sk_mark, + .fl4_dst = faddr, + .fl4_src = saddr, + .fl4_tos = tos, + .flowi_proto = sk->sk_protocol, + .flowi_flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = dport, }; struct net *net = sock_net(sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c70c42e7e77..4294f121a74 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -73,9 +73,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl->fl4_dst; rt->rt_key_src = fl->fl4_src; rt->rt_tos = fl->fl4_tos; - rt->rt_iif = fl->iif; - rt->rt_oif = fl->oif; - rt->rt_mark = fl->mark; + rt->rt_iif = fl->flowi_iif; + rt->rt_oif = fl->flowi_oif; + rt->rt_mark = fl->flowi_mark; xdst->u.dst.dev = dev; dev_hold(dev); @@ -104,7 +104,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + fl->flowi_mark = skb->mark; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { @@ -173,7 +173,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl->proto = iph->protocol; + fl->flowi_proto = iph->protocol; fl->fl4_dst = reverse ? iph->saddr : iph->daddr; fl->fl4_src = reverse ? iph->daddr : iph->saddr; fl->fl4_tos = iph->tos; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 983eff24898..d2314348dd2 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -32,8 +32,8 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) sel->family = AF_INET; sel->prefixlen_d = 32; sel->prefixlen_s = 32; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl->flowi_proto; + sel->ifindex = fl->flowi_oif; } static void diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b2e9d25f..35b0be0463f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -655,12 +655,12 @@ int inet6_sk_rebuild_header(struct sock *sk) struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be3a781c008..6c24b26f67e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -146,16 +146,16 @@ ipv4_connected: * destination cache for it. */ - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; - if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl.flowi_oif = np->mcast_oif; security_sk_classify_flow(sk, &fl); @@ -299,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; - mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + mtu_info->ip6m_addr.sin6_scope_id = fl->flowi_oif; ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); __skb_pull(skb, skb_tail_pointer(skb) - skb->data); @@ -629,16 +629,16 @@ int datagram_send_ctl(struct net *net, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { - if (fl->oif && src_info->ipi6_ifindex != fl->oif) + if (fl->flowi_oif && src_info->ipi6_ifindex != fl->flowi_oif) return -EINVAL; - fl->oif = src_info->ipi6_ifindex; + fl->flowi_oif = src_info->ipi6_ifindex; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); rcu_read_lock(); - if (fl->oif) { - dev = dev_get_by_index_rcu(net, fl->oif); + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (!dev) { rcu_read_unlock(); return -ENODEV; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 55665956b3a..9e123e08b9b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -235,7 +235,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - len, fl->proto, + len, fl->flowi_proto, skb->csum); } else { __wsum tmp_csum = 0; @@ -248,7 +248,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - len, fl->proto, + len, fl->flowi_proto, tmp_csum); } ip6_push_pending_frames(sk); @@ -443,11 +443,11 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) mip6_addr_swap(skb); memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; + fl.flowi_proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = iif; + fl.flowi_oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); @@ -465,8 +465,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; dst = icmpv6_route_lookup(net, skb, sk, &fl); if (IS_ERR(dst)) @@ -539,11 +539,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb) tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; + fl.flowi_proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = skb->dev->ifindex; + fl.flowi_oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); @@ -552,8 +552,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) return; np = inet6_sk(sk); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -793,10 +793,10 @@ void icmpv6_flow_init(struct sock *sk, struct flowi *fl, memset(fl, 0, sizeof(*fl)); ipv6_addr_copy(&fl->fl6_src, saddr); ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; + fl->flowi_proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; - fl->oif = oif; + fl->flowi_oif = oif; security_sk_classify_flow(sk, fl); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d687e139733..673f9bf2895 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -64,12 +64,12 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -213,13 +213,13 @@ int inet6_csk_xmit(struct sk_buff *skb) struct in6_addr *final_p, final; memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_sport = inet->inet_sport; fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 13654686aea..c8fa470b174 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -358,7 +358,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); - flowi.oif = 0; + flowi.flowi_oif = 0; err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk, &junk); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index adaffaf8455..3d0f2ac868a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -182,7 +182,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->proto; + u8 proto = fl->flowi_proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; @@ -908,7 +908,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || #endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + (fl->flowi_oif && fl->flowi_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -1026,7 +1026,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } @@ -1062,7 +1062,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } @@ -1517,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->proto; + unsigned char proto = fl->flowi_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f199b848612..c3fc824c24d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -963,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl->proto; + proto = fl->flowi_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -1020,7 +1020,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPIP; + fl.flowi_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1070,7 +1070,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPV6; + fl.flowi_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) @@ -1149,7 +1149,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) /* Set up flowi template */ ipv6_addr_copy(&fl->fl6_src, &p->laddr); ipv6_addr_copy(&fl->fl6_dst, &p->raddr); - fl->oif = p->link; + fl->flowi_oif = p->link; fl->fl6_flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 618f67ccda3..61a8be3ac4e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -618,8 +618,8 @@ static int pim6_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct mr6_table *mrt; struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + .flowi_mark = skb->mark, }; int reg_vif_num; @@ -688,9 +688,9 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net *net = dev_net(dev); struct mr6_table *mrt; struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + .flowi_oif = dev->ifindex, + .flowi_iif = skb->skb_iif, + .flowi_mark = skb->mark, }; int err; @@ -1548,9 +1548,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi fl = { - .iif = skb->skb_iif, - .oif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->skb_iif, + .flowi_oif = skb->dev->ifindex, + .flowi_mark= skb->mark, }; if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) @@ -1916,7 +1916,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, ipv6h = ipv6_hdr(skb); fl = (struct flowi) { - .oif = vif->link, + .flowi_oif = vif->link, .fl6_dst = ipv6h->daddr, }; @@ -2044,8 +2044,8 @@ int ip6_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct mr6_table *mrt; struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + .flowi_mark= skb->mark, }; int err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1770e061c0..1448c507fdf 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -448,8 +448,8 @@ sticky_done: int junk; fl.fl6_flowlabel = 0; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; if (optlen == 0) goto update; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f3e3ca938a5..e2f852cd0f4 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -214,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct timeval stamp; int err = 0; - if (unlikely(fl->proto == IPPROTO_MH && + if (unlikely(fl->flowi_proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX)) goto out; @@ -240,14 +240,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; - sel.proto = fl->proto; + sel.proto = fl->flowi_proto; sel.dport = xfrm_flowi_dport(fl); if (sel.dport) sel.dport_mask = htons(~0); sel.sport = xfrm_flowi_sport(fl); if (sel.sport) sel.sport_mask = htons(~0); - sel.ifindex = fl->oif; + sel.ifindex = fl->flowi_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8d74116ae27..d282c62bc6f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -16,8 +16,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .mark = skb->mark, + .flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi_mark = skb->mark, .fl6_dst = iph->daddr, .fl6_src = iph->saddr, }; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 91f6a61cefa..fd3938803eb 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -90,7 +90,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dc29b07caf4..323ad44ff77 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -588,9 +588,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, csum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - total_len, fl->proto, tmp_csum); + total_len, fl->flowi_proto, tmp_csum); - if (csum == 0 && fl->proto == IPPROTO_UDP) + if (csum == 0 && fl->flowi_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) @@ -679,7 +679,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl->flowi_proto) { case IPPROTO_ICMPV6: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -758,7 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, */ memset(&fl, 0, sizeof(fl)); - fl.mark = sk->sk_mark; + fl.flowi_mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -800,7 +800,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl.flowi_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; @@ -810,8 +810,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl.fl6_flowlabel = np->flow_label; } - if (fl.oif == 0) - fl.oif = sk->sk_bound_dev_if; + if (fl.flowi_oif == 0) + fl.flowi_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; @@ -838,7 +838,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = proto; + fl.flowi_proto = proto; err = rawv6_probe_proto_opt(&fl, msg); if (err) goto out; @@ -852,8 +852,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, final_p = fl6_update_dst(&fl, opt, &final); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; security_sk_classify_flow(sk, &fl); dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 001276055a6..c3b20d63921 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -608,7 +608,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->flowi_oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->dst, jiffies); @@ -621,7 +621,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { - .oif = oif, + .flowi_oif = oif, .fl6_dst = *daddr, }; struct dst_entry *dst; @@ -825,7 +825,7 @@ out2: static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(net, table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->flowi_iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) @@ -834,12 +834,12 @@ void ip6_route_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { - .iif = skb->dev->ifindex, + .flowi_iif = skb->dev->ifindex, .fl6_dst = iph->daddr, .fl6_src = iph->saddr, .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - .mark = skb->mark, - .proto = iph->nexthdr, + .flowi_mark = skb->mark, + .flowi_proto = iph->nexthdr, }; if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) @@ -851,7 +851,7 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(net, table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->flowi_oif, fl, flags); } struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, @@ -1484,7 +1484,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->oif != rt->rt6i_dev->ifindex) + if (fl->flowi_oif != rt->rt6i_dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1511,7 +1511,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { - .oif = dev->ifindex, + .flowi_oif = dev->ifindex, .fl6_dst = *dest, .fl6_src = *src, }, @@ -2413,7 +2413,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl.oif = nla_get_u32(tb[RTA_OIF]); + fl.flowi_oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0b4cf350631..ca5255c0837 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -234,12 +234,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e59a31c48ba..a3d1229b400 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -242,12 +242,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, (saddr ? saddr : &np->saddr)); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; @@ -396,11 +396,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, for now. */ memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); @@ -487,12 +487,12 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, int err; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.fl6_flowlabel = 0; - fl.oif = treq->iif; - fl.mark = sk->sk_mark; + fl.flowi_oif = treq->iif; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -1055,8 +1055,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); + fl.flowi_proto = IPPROTO_TCP; + fl.flowi_oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d86d7f67a59..91f8047463e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -915,7 +915,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); + up->len, fl->flowi_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1060,7 +1060,7 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl.flowi_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; @@ -1071,13 +1071,13 @@ do_udp_sendmsg: connected = 1; } - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; + if (!fl.flowi_oif) + fl.flowi_oif = sk->sk_bound_dev_if; - if (!fl.oif) - fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl.flowi_oif) + fl.flowi_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.mark = sk->sk_mark; + fl.flowi_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; @@ -1105,7 +1105,7 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) ipv6_addr_copy(&fl.fl6_dst, daddr); else @@ -1118,8 +1118,8 @@ do_udp_sendmsg: if (final_p) connected = 0; - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { + fl.flowi_oif = np->mcast_oif; connected = 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48ce496802f..d62496c1a6f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -128,7 +128,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + fl->flowi_mark = skb->mark; ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); @@ -161,7 +161,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_ip_sport = ports[!!reverse]; fl->fl_ip_dport = ports[!reverse]; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; case IPPROTO_ICMPV6: @@ -171,7 +171,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_icmp_type = icmp[0]; fl->fl_icmp_code = icmp[1]; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_mh_type = mh->ip6mh_type; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; #endif @@ -192,7 +192,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_COMP: default: fl->fl_ipsec_spi = 0; - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; } } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a02598e0079..805d0e14c33 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -33,8 +33,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) sel->family = AF_INET6; sel->prefixlen_d = 128; sel->prefixlen_s = 128; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl->flowi_proto; + sel->ifindex = fl->flowi_oif; } static void diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d69ec26b6bd..d07a32aa07b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -76,7 +76,7 @@ static int __ip_vs_addr_is_local_v6(struct net *net, { struct rt6_info *rt; struct flowi fl = { - .oif = 0, + .flowi_oif = 0, .fl6_dst = *addr, .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index faf381d9da7..cc8071f6890 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -169,7 +169,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb) .fl4_dst = iph->daddr, .fl4_src = iph->saddr, .fl4_tos = RT_TOS(iph->tos), - .mark = skb->mark, + .flowi_mark = skb->mark, }; rt = ip_route_output_key(net, &fl); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 624725b5286..cb14ae2de15 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -68,7 +68,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl.flowi_oif = info->priv->oif; } fl.fl4_dst = info->gw.ip; fl.fl4_tos = RT_TOS(iph->tos); @@ -149,7 +149,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl.flowi_oif = info->priv->oif; } fl.fl6_dst = info->gw.in6; fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 95e0c8eda1a..83162715688 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -205,7 +205,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. @@ -216,9 +216,9 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) - fl.oif = transport->saddr.v6.sin6_scope_id; + fl.flowi_oif = transport->saddr.v6.sin6_scope_id; else - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -250,7 +250,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - fl.oif = daddr->v6.sin6_scope_id; + fl.flowi_oif = daddr->v6.sin6_scope_id; SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e55e6c49ec..832665ac210 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -477,10 +477,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; fl.fl_ip_dport = daddr->v4.sin_port; - fl.proto = IPPROTO_SCTP; + fl.flowi_proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); - fl.oif = asoc->base.sk->sk_bound_dev_if; + fl.flowi_oif = asoc->base.sk->sk_bound_dev_if; fl.fl_ip_sport = htons(asoc->base.bind_addr.port); } if (saddr) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e4aacda26c..dd6243f9d93 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -63,8 +63,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + (fl->flowi_proto == sel->proto || !sel->proto) && + (fl->flowi_oif == sel->ifindex || !sel->ifindex); } static inline int @@ -74,8 +74,8 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + (fl->flowi_proto == sel->proto || !sel->proto) && + (fl->flowi_oif == sel->ifindex || !sel->ifindex); } int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, @@ -876,13 +876,13 @@ static int xfrm_policy_match(const struct xfrm_policy *pol, int match, ret = -ESRCH; if (pol->family != family || - (fl->mark & pol->mark.m) != pol->mark.v || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol->security, fl->secid, + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, dir); return ret; @@ -1012,7 +1012,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, goto out; } err = security_xfrm_policy_lookup(pol->security, - fl->secid, + fl->flowi_secid, policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); @@ -1848,7 +1848,7 @@ restart: return make_blackhole(net, family, dst_orig); } - if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { + if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(&net->xfrm.km_waitq, &wait); @@ -1990,7 +1990,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); - err = security_xfrm_decode_session(skb, &fl->secid); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); return err; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 81221d9cbf0..cd6be49f2ae 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -859,7 +859,7 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { x->km.state = XFRM_STATE_DEAD; to_put = x; diff --git a/security/security.c b/security/security.c index 8ef1f7dff27..bae843c8a13 100644 --- a/security/security.c +++ b/security/security.c @@ -1100,7 +1100,7 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk) void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - security_ops->sk_getsecid(sk, &fl->secid); + security_ops->sk_getsecid(sk, &fl->flowi_secid); } EXPORT_SYMBOL(security_sk_classify_flow); @@ -1246,7 +1246,7 @@ int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { - int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); + int rc = security_ops->xfrm_decode_session(skb, &fl->flowi_secid, 0); BUG_ON(rc); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index cef42f5d69a..c178494850a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4306,7 +4306,7 @@ static void selinux_secmark_refcount_dec(void) static void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) { - fl->secid = req->secid; + fl->flowi_secid = req->secid; } static int selinux_tun_dev_create(void) diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c43ab542246..510ec2cf6c2 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -135,10 +135,10 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * state_sid = x->security->ctx_sid; - if (fl->secid != state_sid) + if (fl->flowi_secid != state_sid) return 0; - rc = avc_has_perm(fl->secid, state_sid, SECCLASS_ASSOCIATION, + rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, NULL)? 0:1; -- cgit v1.2.3-70-g09d2 From 05aebe2e5d009314a1d9b47ad9cda59ccb57d76d Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Mon, 14 Mar 2011 13:47:40 -0700 Subject: pktgen: bug fix in transmission headers with frags=0 (bug introduced by commit 26ad787962ef84677a48c560 (pktgen: speedup fragmented skbs) The headers of pktgen were incorrectly added in a pktgen packet without frags (frags=0). There was an offset in the pktgen headers. The cause was in reusing the pgh variable as a return variable in skb_put when adding the payload to the skb. Signed-off-by: Daniel Turull Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet --- net/core/pktgen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f0aec6c39ec..0c55eaa70e3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2620,8 +2620,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, datalen -= sizeof(*pgh); if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen); + memset(skb_put(skb, datalen), 0, datalen); } else { int frags = pkt_dev->nfrags; int i, len; -- cgit v1.2.3-70-g09d2 From 8a4eb5734e8d1dc60a8c28576bbbdfdcc643626d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Mar 2011 03:14:39 +0000 Subject: net: introduce rx_handler results and logic around that MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows rx_handlers to better signalize what to do next to it's caller. That makes skb->deliver_no_wcard no longer needed. kernel-doc for rx_handler_result is taken from Nicolas' patch. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 22 +++++++++--------- drivers/net/macvlan.c | 11 ++++----- include/linux/netdevice.h | 50 ++++++++++++++++++++++++++++++++++++++++- include/linux/skbuff.h | 5 +---- net/bridge/br_input.c | 25 ++++++++++++--------- net/bridge/br_private.h | 2 +- net/core/dev.c | 21 +++++++++++------ net/core/skbuff.c | 1 - 8 files changed, 98 insertions(+), 39 deletions(-) (limited to 'net/core') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 04119b1e7cd..27c413aa15d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1480,20 +1480,23 @@ static bool bond_should_deliver_exact_match(struct sk_buff *skb, return false; } -static struct sk_buff *bond_handle_frame(struct sk_buff *skb) +static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; struct slave *slave; struct net_device *bond_dev; struct bonding *bond; - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb)) - return NULL; - slave = bond_slave_get_rcu(skb->dev); bond_dev = ACCESS_ONCE(slave->dev->master); if (unlikely(!bond_dev)) - return skb; + return RX_HANDLER_PASS; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return RX_HANDLER_CONSUMED; + + *pskb = skb; bond = netdev_priv(bond_dev); @@ -1501,8 +1504,7 @@ static struct sk_buff *bond_handle_frame(struct sk_buff *skb) slave->dev->last_rx = jiffies; if (bond_should_deliver_exact_match(skb, slave, bond)) { - skb->deliver_no_wcard = 1; - return skb; + return RX_HANDLER_EXACT; } skb->dev = bond_dev; @@ -1514,12 +1516,12 @@ static struct sk_buff *bond_handle_frame(struct sk_buff *skb) if (unlikely(skb_cow_head(skb, skb->data - skb_mac_header(skb)))) { kfree_skb(skb); - return NULL; + return RX_HANDLER_CONSUMED; } memcpy(eth_hdr(skb)->h_dest, bond_dev->dev_addr, ETH_ALEN); } - return skb; + return RX_HANDLER_ANOTHER; } /* enslave device to bond device */ diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 497991bd3b6..5b37d3c191e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -152,9 +152,10 @@ static void macvlan_broadcast(struct sk_buff *skb, } /* called under rcu_read_lock() from netif_receive_skb */ -static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) +static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) { struct macvlan_port *port; + struct sk_buff *skb = *pskb; const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; const struct macvlan_dev *src; @@ -184,7 +185,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) */ macvlan_broadcast(skb, port, src->dev, MACVLAN_MODE_VEPA); - return skb; + return RX_HANDLER_PASS; } if (port->passthru) @@ -192,12 +193,12 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) else vlan = macvlan_hash_lookup(port, eth->h_dest); if (vlan == NULL) - return skb; + return RX_HANDLER_PASS; dev = vlan->dev; if (unlikely(!(dev->flags & IFF_UP))) { kfree_skb(skb); - return NULL; + return RX_HANDLER_CONSUMED; } len = skb->len + ETH_HLEN; skb = skb_share_check(skb, GFP_ATOMIC); @@ -211,7 +212,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) out: macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, 0); - return NULL; + return RX_HANDLER_CONSUMED; } static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 604dbf5051d..5eeb2cd3631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -390,7 +390,55 @@ enum gro_result { }; typedef enum gro_result gro_result_t; -typedef struct sk_buff *rx_handler_func_t(struct sk_buff *skb); +/* + * enum rx_handler_result - Possible return values for rx_handlers. + * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it + * further. + * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in + * case skb->dev was changed by rx_handler. + * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. + * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. + * + * rx_handlers are functions called from inside __netif_receive_skb(), to do + * special processing of the skb, prior to delivery to protocol handlers. + * + * Currently, a net_device can only have a single rx_handler registered. Trying + * to register a second rx_handler will return -EBUSY. + * + * To register a rx_handler on a net_device, use netdev_rx_handler_register(). + * To unregister a rx_handler on a net_device, use + * netdev_rx_handler_unregister(). + * + * Upon return, rx_handler is expected to tell __netif_receive_skb() what to + * do with the skb. + * + * If the rx_handler consumed to skb in some way, it should return + * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for + * the skb to be delivered in some other ways. + * + * If the rx_handler changed skb->dev, to divert the skb to another + * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the + * new device will be called if it exists. + * + * If the rx_handler consider the skb should be ignored, it should return + * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that + * are registred on exact device (ptype->dev == skb->dev). + * + * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * delivered, it should return RX_HANDLER_PASS. + * + * A device without a registered rx_handler will behave as if rx_handler + * returned RX_HANDLER_PASS. + */ + +enum rx_handler_result { + RX_HANDLER_CONSUMED, + RX_HANDLER_ANOTHER, + RX_HANDLER_EXACT, + RX_HANDLER_PASS, +}; +typedef enum rx_handler_result rx_handler_result_t; +typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); extern void __napi_schedule(struct napi_struct *n); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 31f02d0b46a..24cfa626931 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -388,10 +388,7 @@ struct sk_buff { kmemcheck_bitfield_begin(flags2); __u16 queue_mapping:16; #ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2, - deliver_no_wcard:1; -#else - __u8 deliver_no_wcard:1; + __u8 ndisc_nodetype:2; #endif __u8 ooo_okay:1; kmemcheck_bitfield_end(flags2); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 88e4aa9cb1f..e2160792e1b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -139,21 +139,22 @@ static inline int is_link_local(const unsigned char *dest) * Return NULL if skb is handled * note: already called with rcu_read_lock */ -struct sk_buff *br_handle_frame(struct sk_buff *skb) +rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { struct net_bridge_port *p; + struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) - return skb; + return RX_HANDLER_PASS; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) - return NULL; + return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); @@ -167,10 +168,12 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) goto forward; if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) - return NULL; /* frame consumed by filter */ - else - return skb; /* continue processing */ + NULL, br_handle_local_finish)) { + return RX_HANDLER_CONSUMED; /* consumed by filter */ + } else { + *pskb = skb; + return RX_HANDLER_PASS; /* continue processing */ + } } forward: @@ -178,8 +181,10 @@ forward: case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); if (rhook) { - if ((*rhook)(skb)) - return skb; + if ((*rhook)(skb)) { + *pskb = skb; + return RX_HANDLER_PASS; + } dest = eth_hdr(skb)->h_dest; } /* fall through */ @@ -194,5 +199,5 @@ forward: drop: kfree_skb(skb); } - return NULL; + return RX_HANDLER_CONSUMED; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f7afc364d77..19e2f46ed08 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -379,7 +379,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct sk_buff *skb); +extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/core/dev.c b/net/core/dev.c index 0d39032e962..0b88eba97da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3070,6 +3070,8 @@ out: * on a failure. * * The caller must hold the rtnl_mutex. + * + * For a general description of rx_handler, see enum rx_handler_result. */ int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, @@ -3129,6 +3131,7 @@ static int __netif_receive_skb(struct sk_buff *skb) rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *null_or_dev; + bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3181,18 +3184,22 @@ ncls: rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { - struct net_device *prev_dev; - if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - prev_dev = skb->dev; - skb = rx_handler(skb); - if (!skb) + switch (rx_handler(&skb)) { + case RX_HANDLER_CONSUMED: goto out; - if (skb->dev != prev_dev) + case RX_HANDLER_ANOTHER: goto another_round; + case RX_HANDLER_EXACT: + deliver_exact = true; + case RX_HANDLER_PASS: + break; + default: + BUG(); + } } if (vlan_tx_tag_present(skb)) { @@ -3210,7 +3217,7 @@ ncls: vlan_on_bond_hook(skb); /* deliver only exact match when indicated */ - null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; + null_or_dev = deliver_exact ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1eb526a848f..801dd08908f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -523,7 +523,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; - new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif -- cgit v1.2.3-70-g09d2