diff options
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r-- | include/linux/netdevice.h | 730 |
1 files changed, 537 insertions, 193 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index daafd9561cb..38377392d08 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -56,20 +56,10 @@ struct device; struct phy_device; /* 802.11 specific */ struct wireless_dev; - /* source back-compat hooks */ -#define SET_ETHTOOL_OPS(netdev,ops) \ - ( (netdev)->ethtool_ops = (ops) ) void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); -/* hardware address assignment types */ -#define NET_ADDR_PERM 0 /* address is permanent (default) */ -#define NET_ADDR_RANDOM 1 /* address is generated randomly */ -#define NET_ADDR_STOLEN 2 /* address is stolen from other device */ -#define NET_ADDR_SET 3 /* address is set using - * dev_set_mac_address() */ - /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ @@ -500,7 +490,7 @@ static inline void napi_disable(struct napi_struct *n) static inline void napi_enable(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); } @@ -526,11 +516,18 @@ enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_FROZEN, -#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ - (1 << __QUEUE_STATE_STACK_XOFF)) -#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ - (1 << __QUEUE_STATE_FROZEN)) }; + +#define QUEUE_STATE_DRV_XOFF (1 << __QUEUE_STATE_DRV_XOFF) +#define QUEUE_STATE_STACK_XOFF (1 << __QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_FROZEN (1 << __QUEUE_STATE_FROZEN) + +#define QUEUE_STATE_ANY_XOFF (QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ + QUEUE_STATE_FROZEN) +#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \ + QUEUE_STATE_FROZEN) + /* * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The * netif_tx_* functions below are used to manipulate this flag. The @@ -853,7 +850,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); - * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); + * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, + * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); @@ -945,7 +943,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, - * struct net_device *dev, int idx) + * struct net_device *dev, struct net_device *filter_dev, + * int idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1037,8 +1036,7 @@ struct net_device_ops { #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info, - gfp_t gfp); + struct netpoll_info *info); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_RX_BUSY_POLL @@ -1048,8 +1046,9 @@ struct net_device_ops { int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan, u8 qos); - int (*ndo_set_vf_tx_rate)(struct net_device *dev, - int vf, int rate); + int (*ndo_set_vf_rate)(struct net_device *dev, + int vf, int min_tx_rate, + int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, @@ -1116,6 +1115,7 @@ struct net_device_ops { int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx); int (*ndo_bridge_setlink)(struct net_device *dev, @@ -1145,44 +1145,314 @@ struct net_device_ops { netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, struct net_device *dev, void *priv); + int (*ndo_get_lock_subclass)(struct net_device *dev); }; -/* - * The DEVICE structure. - * Actually, this whole structure is a big mistake. It mixes I/O - * data with strictly "high-level" data, and it has to know about - * almost every data structure used in the INET module. +/** + * enum net_device_priv_flags - &struct net_device priv_flags + * + * These are the &struct net_device, they are only set internally + * by drivers and used in the kernel. These flags are invisible to + * userspace, this means that the order of these flags can change + * during any kernel release. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_802_1Q_VLAN: 802.1Q VLAN device + * @IFF_EBRIDGE: Ethernet bridging device + * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active + * @IFF_MASTER_8023AD: bonding master, 802.3ad + * @IFF_MASTER_ALB: bonding master, balance-alb + * @IFF_BONDING: bonding master or slave + * @IFF_SLAVE_NEEDARP: need ARPs for validation + * @IFF_ISATAP: ISATAP interface (RFC4214) + * @IFF_MASTER_ARPMON: bonding master, ARP mon in use + * @IFF_WAN_HDLC: WAN HDLC device + * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to + * release skb->dst + * @IFF_DONT_BRIDGE: disallow bridging this ether dev + * @IFF_DISABLE_NETPOLL: disable netpoll at run-time + * @IFF_MACVLAN_PORT: device used as macvlan port + * @IFF_BRIDGE_PORT: device used as bridge port + * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port + * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit + * @IFF_UNICAST_FLT: Supports unicast filtering + * @IFF_TEAM_PORT: device used as team port + * @IFF_SUPP_NOFCS: device supports sending custom FCS + * @IFF_LIVE_ADDR_CHANGE: device supports hardware address + * change when it's running + * @IFF_MACVLAN: Macvlan device + */ +enum netdev_priv_flags { + IFF_802_1Q_VLAN = 1<<0, + IFF_EBRIDGE = 1<<1, + IFF_SLAVE_INACTIVE = 1<<2, + IFF_MASTER_8023AD = 1<<3, + IFF_MASTER_ALB = 1<<4, + IFF_BONDING = 1<<5, + IFF_SLAVE_NEEDARP = 1<<6, + IFF_ISATAP = 1<<7, + IFF_MASTER_ARPMON = 1<<8, + IFF_WAN_HDLC = 1<<9, + IFF_XMIT_DST_RELEASE = 1<<10, + IFF_DONT_BRIDGE = 1<<11, + IFF_DISABLE_NETPOLL = 1<<12, + IFF_MACVLAN_PORT = 1<<13, + IFF_BRIDGE_PORT = 1<<14, + IFF_OVS_DATAPATH = 1<<15, + IFF_TX_SKB_SHARING = 1<<16, + IFF_UNICAST_FLT = 1<<17, + IFF_TEAM_PORT = 1<<18, + IFF_SUPP_NOFCS = 1<<19, + IFF_LIVE_ADDR_CHANGE = 1<<20, + IFF_MACVLAN = 1<<21, +}; + +#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +#define IFF_EBRIDGE IFF_EBRIDGE +#define IFF_SLAVE_INACTIVE IFF_SLAVE_INACTIVE +#define IFF_MASTER_8023AD IFF_MASTER_8023AD +#define IFF_MASTER_ALB IFF_MASTER_ALB +#define IFF_BONDING IFF_BONDING +#define IFF_SLAVE_NEEDARP IFF_SLAVE_NEEDARP +#define IFF_ISATAP IFF_ISATAP +#define IFF_MASTER_ARPMON IFF_MASTER_ARPMON +#define IFF_WAN_HDLC IFF_WAN_HDLC +#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE +#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE +#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL +#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT +#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT +#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH +#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING +#define IFF_UNICAST_FLT IFF_UNICAST_FLT +#define IFF_TEAM_PORT IFF_TEAM_PORT +#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS +#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE +#define IFF_MACVLAN IFF_MACVLAN + +/** + * struct net_device - The DEVICE structure. + * Actually, this whole structure is a big mistake. It mixes I/O + * data with strictly "high-level" data, and it has to know about + * almost every data structure used in the INET module. + * + * @name: This is the first field of the "visible" part of this structure + * (i.e. as seen by users in the "Space.c" file). It is the name + * of the interface. + * + * @name_hlist: Device name hash chain, please keep it close to name[] + * @ifalias: SNMP alias + * @mem_end: Shared memory end + * @mem_start: Shared memory start + * @base_addr: Device I/O address + * @irq: Device IRQ number + * + * @state: Generic network queuing layer state, see netdev_state_t + * @dev_list: The global list of network devices + * @napi_list: List entry, that is used for polling napi devices + * @unreg_list: List entry, that is used, when we are unregistering the + * device, see the function unregister_netdev + * @close_list: List entry, that is used, when we are closing the device + * + * @adj_list: Directly linked devices, like slaves for bonding + * @all_adj_list: All linked devices, *including* neighbours + * @features: Currently active device features + * @hw_features: User-changeable features + * + * @wanted_features: User-requested features + * @vlan_features: Mask of features inheritable by VLAN devices + * + * @hw_enc_features: Mask of features inherited by encapsulating devices + * This field indicates what encapsulation + * offloads the hardware is capable of doing, + * and drivers will need to set them appropriately. + * + * @mpls_features: Mask of features inheritable by MPLS + * + * @ifindex: interface index + * @iflink: unique device identifier + * + * @stats: Statistics struct, which was left as a legacy, use + * rtnl_link_stats64 instead + * + * @rx_dropped: Dropped packets by core network, + * do not use this in drivers + * @tx_dropped: Dropped packets by core network, + * do not use this in drivers + * + * @carrier_changes: Stats to monitor carrier on<->off transitions + * + * @wireless_handlers: List of functions to handle Wireless Extensions, + * instead of ioctl, + * see <net/iw_handler.h> for details. + * @wireless_data: Instance data managed by the core of wireless extensions + * + * @netdev_ops: Includes several pointers to callbacks, + * if one wants to override the ndo_*() functions + * @ethtool_ops: Management operations + * @fwd_ops: Management operations + * @header_ops: Includes callbacks for creating,parsing,rebuilding,etc + * of Layer 2 headers. + * + * @flags: Interface flags (a la BSD) + * @priv_flags: Like 'flags' but invisible to userspace, + * see if.h for the definitions + * @gflags: Global flags ( kept as legacy ) + * @padded: How much padding added by alloc_netdev() + * @operstate: RFC2863 operstate + * @link_mode: Mapping policy to operstate + * @if_port: Selectable AUI, TP, ... + * @dma: DMA channel + * @mtu: Interface MTU value + * @type: Interface hardware type + * @hard_header_len: Hardware header length + * + * @needed_headroom: Extra headroom the hardware may need, but not in all + * cases can this be guaranteed + * @needed_tailroom: Extra tailroom the hardware may need, but not in all + * cases can this be guaranteed. Some cases also use + * LL_MAX_HEADER instead to allocate the skb + * + * interface address info: + * + * @perm_addr: Permanent hw address + * @addr_assign_type: Hw address assignment type + * @addr_len: Hardware address length + * @neigh_priv_len; Used in neigh_alloc(), + * initialized only in atm/clip.c + * @dev_id: Used to differentiate devices that share + * the same link layer address + * @dev_port: Used to differentiate devices that share + * the same function + * @addr_list_lock: XXX: need comments on this one + * @uc: unicast mac addresses + * @mc: multicast mac addresses + * @dev_addrs: list of device hw addresses + * @queues_kset: Group of all Kobjects in the Tx and RX queues + * @uc_promisc: Counter, that indicates, that promiscuous mode + * has been enabled due to the need to listen to + * additional unicast addresses in a device that + * does not implement ndo_set_rx_mode() + * @promiscuity: Number of times, the NIC is told to work in + * Promiscuous mode, if it becomes 0 the NIC will + * exit from working in Promiscuous mode + * @allmulti: Counter, enables or disables allmulticast mode + * + * @vlan_info: VLAN info + * @dsa_ptr: dsa specific data + * @tipc_ptr: TIPC specific data + * @atalk_ptr: AppleTalk link + * @ip_ptr: IPv4 specific data + * @dn_ptr: DECnet specific data + * @ip6_ptr: IPv6 specific data + * @ax25_ptr: AX.25 specific data + * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering + * + * @last_rx: Time of last Rx + * @dev_addr: Hw address (before bcast, + * because most packets are unicast) + * + * @_rx: Array of RX queues + * @num_rx_queues: Number of RX queues + * allocated at register_netdev() time + * @real_num_rx_queues: Number of RX queues currently active in device + * + * @rx_handler: handler for received packets + * @rx_handler_data: XXX: need comments on this one + * @ingress_queue: XXX: need comments on this one + * @broadcast: hw bcast address + * + * @_tx: Array of TX queues + * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time + * @real_num_tx_queues: Number of TX queues currently active in device + * @qdisc: Root qdisc from userspace point of view + * @tx_queue_len: Max frames per queue allowed + * @tx_global_lock: XXX: need comments on this one + * + * @xps_maps: XXX: need comments on this one + * + * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, + * indexed by RX queue number. Assigned by driver. + * This must only be set if the ndo_rx_flow_steer + * operation is defined + * + * @trans_start: Time (in jiffies) of last Tx + * @watchdog_timeo: Represents the timeout that is used by + * the watchdog ( see dev_watchdog() ) + * @watchdog_timer: List of timers + * + * @pcpu_refcnt: Number of references to this device + * @todo_list: Delayed register/unregister + * @index_hlist: Device index hash chain + * @link_watch_list: XXX: need comments on this one + * + * @reg_state: Register/unregister state machine + * @dismantle: Device is going to be freed + * @rtnl_link_state: This enum represents the phases of creating + * a new link + * + * @destructor: Called from unregister, + * can be used to call free_netdev + * @npinfo: XXX: need comments on this one + * @nd_net: Network namespace this network device is inside + * + * @ml_priv: Mid-layer private + * @lstats: Loopback statistics + * @tstats: Tunnel statistics + * @dstats: Dummy statistics + * @vstats: Virtual ethernet statistics + * + * @garp_port: GARP + * @mrp_port: MRP + * + * @dev: Class/net/name entry + * @sysfs_groups: Space for optional device, statistics and wireless + * sysfs groups + * + * @sysfs_rx_queue_group: Space for optional per-rx queue attributes + * @rtnl_link_ops: Rtnl_link_ops + * + * @gso_max_size: Maximum size of generic segmentation offload + * @gso_max_segs: Maximum number of segments that can be passed to the + * NIC for GSO + * + * @dcbnl_ops: Data Center Bridging netlink ops + * @num_tc: Number of traffic classes in the net device + * @tc_to_txq: XXX: need comments on this one + * @prio_tc_map XXX: need comments on this one + * + * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp + * + * @priomap: XXX: need comments on this one + * @phydev: Physical device may attach itself + * for hardware timestamping + * + * @qdisc_tx_busylock: XXX: need comments on this one + * + * @group: The group, that the device belongs to + * @pm_qos_req: Power Management QoS object * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ struct net_device { - - /* - * This is the first field of the "visible" part of this structure - * (i.e. as seen by users in the "Space.c" file). It is the name - * of the interface. - */ char name[IFNAMSIZ]; - - /* device name hash chain, please keep it close to name[] */ struct hlist_node name_hlist; - - /* snmp alias */ char *ifalias; - /* * I/O specific fields * FIXME: Merge these and struct ifmap into one */ - unsigned long mem_end; /* shared mem end */ - unsigned long mem_start; /* shared mem start */ - unsigned long base_addr; /* device I/O address */ - int irq; /* device IRQ number */ + unsigned long mem_end; + unsigned long mem_start; + unsigned long base_addr; + int irq; /* - * Some hardware also needs these fields, but they are not + * Some hardware also needs these fields (state,dev_list, + * napi_list,unreg_list,close_list) but they are not * part of the usual set specified in Space.c. */ @@ -1193,102 +1463,80 @@ struct net_device { struct list_head unreg_list; struct list_head close_list; - /* directly linked devices, like slaves for bonding */ struct { struct list_head upper; struct list_head lower; } adj_list; - /* all linked devices, *including* neighbours */ struct { struct list_head upper; struct list_head lower; } all_adj_list; - - /* currently active device features */ netdev_features_t features; - /* user-changeable features */ netdev_features_t hw_features; - /* user-requested features */ netdev_features_t wanted_features; - /* mask of features inheritable by VLAN devices */ netdev_features_t vlan_features; - /* mask of features inherited by encapsulating devices - * This field indicates what encapsulation offloads - * the hardware is capable of doing, and drivers will - * need to set them appropriately. - */ netdev_features_t hw_enc_features; - /* mask of fetures inheritable by MPLS */ netdev_features_t mpls_features; - /* Interface index. Unique device identifier */ int ifindex; int iflink; struct net_device_stats stats; - atomic_long_t rx_dropped; /* dropped packets by core network - * Do not use this in drivers. - */ + + atomic_long_t rx_dropped; + atomic_long_t tx_dropped; + + atomic_t carrier_changes; #ifdef CONFIG_WIRELESS_EXT - /* List of functions to handle Wireless Extensions (instead of ioctl). - * See <net/iw_handler.h> for details. Jean II */ const struct iw_handler_def * wireless_handlers; - /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif - /* Management operations */ const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; const struct forwarding_accel_ops *fwd_ops; - /* Hardware header description */ const struct header_ops *header_ops; - unsigned int flags; /* interface flags (a la BSD) */ - unsigned int priv_flags; /* Like 'flags' but invisible to userspace. - * See if.h for definitions. */ + unsigned int flags; + unsigned int priv_flags; + unsigned short gflags; - unsigned short padded; /* How much padding added by alloc_netdev() */ + unsigned short padded; - unsigned char operstate; /* RFC2863 operstate */ - unsigned char link_mode; /* mapping policy to operstate */ + unsigned char operstate; + unsigned char link_mode; - unsigned char if_port; /* Selectable AUI, TP,..*/ - unsigned char dma; /* DMA channel */ + unsigned char if_port; + unsigned char dma; - unsigned int mtu; /* interface MTU value */ - unsigned short type; /* interface hardware type */ - unsigned short hard_header_len; /* hardware hdr length */ + unsigned int mtu; + unsigned short type; + unsigned short hard_header_len; - /* extra head- and tailroom the hardware may need, but not in all cases - * can this be guaranteed, especially tailroom. Some cases also use - * LL_MAX_HEADER instead to allocate the skb. - */ unsigned short needed_headroom; unsigned short needed_tailroom; /* Interface address info. */ - unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ - unsigned char addr_assign_type; /* hw address assignment type */ - unsigned char addr_len; /* hardware address length */ + unsigned char perm_addr[MAX_ADDR_LEN]; + unsigned char addr_assign_type; + unsigned char addr_len; unsigned short neigh_priv_len; - unsigned short dev_id; /* Used to differentiate devices - * that share the same link - * layer address - */ + unsigned short dev_id; + unsigned short dev_port; spinlock_t addr_list_lock; - struct netdev_hw_addr_list uc; /* Unicast mac addresses */ - struct netdev_hw_addr_list mc; /* Multicast mac addresses */ - struct netdev_hw_addr_list dev_addrs; /* list of device - * hw addresses - */ + struct netdev_hw_addr_list uc; + struct netdev_hw_addr_list mc; + struct netdev_hw_addr_list dev_addrs; + #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif + unsigned char name_assign_type; + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1297,46 +1545,34 @@ struct net_device { /* Protocol specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) - struct vlan_info __rcu *vlan_info; /* VLAN info */ + struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) - struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ + struct dsa_switch_tree *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) - struct tipc_bearer __rcu *tipc_ptr; /* TIPC specific data */ + struct tipc_bearer __rcu *tipc_ptr; #endif - void *atalk_ptr; /* AppleTalk link */ - struct in_device __rcu *ip_ptr; /* IPv4 specific data */ - struct dn_dev __rcu *dn_ptr; /* DECnet specific data */ - struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */ - void *ax25_ptr; /* AX.25 specific data */ - struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, - assign before registering */ + void *atalk_ptr; + struct in_device __rcu *ip_ptr; + struct dn_dev __rcu *dn_ptr; + struct inet6_dev __rcu *ip6_ptr; + void *ax25_ptr; + struct wireless_dev *ieee80211_ptr; /* * Cache lines mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx - * This should not be set in - * drivers, unless really needed, - * because network stack (bonding) - * use it if/when necessary, to - * avoid dirtying this cache line. - */ + unsigned long last_rx; /* Interface address info used in eth_type_trans() */ - unsigned char *dev_addr; /* hw address, (before bcast - because most packets are - unicast) */ + unsigned char *dev_addr; #ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; - /* Number of RX queues allocated at register_netdev() time */ unsigned int num_rx_queues; - - /* Number of RX queues currently active in device */ unsigned int real_num_rx_queues; #endif @@ -1345,33 +1581,23 @@ struct net_device { void __rcu *rx_handler_data; struct netdev_queue __rcu *ingress_queue; - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + unsigned char broadcast[MAX_ADDR_LEN]; /* * Cache lines mostly used on transmit path */ struct netdev_queue *_tx ____cacheline_aligned_in_smp; - - /* Number of TX queues allocated at alloc_netdev_mq() time */ unsigned int num_tx_queues; - - /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - - /* root qdisc from userspace point of view */ struct Qdisc *qdisc; - - unsigned long tx_queue_len; /* Max frames per queue allowed */ + unsigned long tx_queue_len; spinlock_t tx_global_lock; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif #ifdef CONFIG_RFS_ACCEL - /* CPU reverse-mapping for RX completion interrupts, indexed - * by RX queue number. Assigned by driver. This must only be - * set if the ndo_rx_flow_steer operation is defined. */ struct cpu_rmap *rx_cpu_rmap; #endif @@ -1381,22 +1607,17 @@ struct net_device { * trans_start here is expensive for high speed devices on SMP, * please use netdev_queue->trans_start instead. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ + unsigned long trans_start; - int watchdog_timeo; /* used by dev_watchdog() */ + int watchdog_timeo; struct timer_list watchdog_timer; - /* Number of references to this device */ int __percpu *pcpu_refcnt; - - /* delayed register/unregister */ struct list_head todo_list; - /* device index hash chain */ - struct hlist_node index_hlist; + struct hlist_node index_hlist; struct list_head link_watch_list; - /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, NETREG_REGISTERED, /* completed register_netdevice */ NETREG_UNREGISTERING, /* called unregister_netdevice */ @@ -1405,14 +1626,13 @@ struct net_device { NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state:8; - bool dismantle; /* device is going do be freed */ + bool dismantle; enum { RTNL_LINK_INITIALIZED, RTNL_LINK_INITIALIZING, } rtnl_link_state:16; - /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); #ifdef CONFIG_NETPOLL @@ -1420,31 +1640,25 @@ struct net_device { #endif #ifdef CONFIG_NET_NS - /* Network namespace this network device is inside */ struct net *nd_net; #endif /* mid-layer private */ union { - void *ml_priv; - struct pcpu_lstats __percpu *lstats; /* loopback stats */ + void *ml_priv; + struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; /* dummy stats */ - struct pcpu_vstats __percpu *vstats; /* veth stats */ + struct pcpu_dstats __percpu *dstats; + struct pcpu_vstats __percpu *vstats; }; - /* GARP */ + struct garp_port __rcu *garp_port; - /* MRP */ struct mrp_port __rcu *mrp_port; - /* class/net/name entry */ - struct device dev; - /* space for optional device, statistics, and wireless sysfs groups */ + struct device dev; const struct attribute_group *sysfs_groups[4]; - /* space for optional per-rx queue attributes */ const struct attribute_group *sysfs_rx_queue_group; - /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ @@ -1454,7 +1668,6 @@ struct net_device { u16 gso_max_segs; #ifdef CONFIG_DCB - /* Data Center Bridging netlink ops */ const struct dcbnl_rtnl_ops *dcbnl_ops; #endif u8 num_tc; @@ -1462,20 +1675,14 @@ struct net_device { u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) - /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; #endif #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - /* phy device may attach itself for hardware timestamping */ struct phy_device *phydev; - struct lock_class_key *qdisc_tx_busylock; - - /* group the device belongs to */ int group; - struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1729,6 +1936,20 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; +#define netdev_alloc_pcpu_stats(type) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ + if (pcpu_stats) { \ + int i; \ + for_each_possible_cpu(i) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, i); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) + #include <linux/notifier.h> /* netdevice notifier chain. Please remember to update the rtnetlink @@ -1884,9 +2105,6 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void); -#endif int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) @@ -1926,11 +2144,6 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, return skb->data + offset; } -static inline void *skb_gro_mac_header(struct sk_buff *skb) -{ - return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb); -} - static inline void *skb_gro_network_header(struct sk_buff *skb) { return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + @@ -2091,12 +2304,6 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) { - netif_tx_start_queue(dev_queue); - return; - } -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) __netif_schedule(dev_queue->qdisc); } @@ -2175,11 +2382,18 @@ static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) return dev_queue->state & QUEUE_STATE_ANY_XOFF; } -static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) +static inline bool +netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } +static inline bool +netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) +{ + return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; +} + static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { @@ -2340,10 +2554,6 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif netif_tx_stop_queue(txq); } @@ -2378,10 +2588,6 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) - return; -#endif if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) __netif_schedule(txq->qdisc); } @@ -2403,7 +2609,7 @@ static inline int netif_set_xps_queue(struct net_device *dev, * as a distribution range limit for the returned value. */ static inline u16 skb_tx_hash(const struct net_device *dev, - const struct sk_buff *skb) + struct sk_buff *skb) { return __skb_tx_hash(dev, skb, dev->real_num_tx_queues); } @@ -2550,7 +2756,9 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); +bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); extern int netdev_budget; @@ -2831,6 +3039,11 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } \ } +#define HARD_TX_TRYLOCK(dev, txq) \ + (((dev->features & NETIF_F_LLTX) == 0) ? \ + __netif_tx_trylock(txq) : \ + true ) + #define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_unlock(txq); \ @@ -2861,7 +3074,12 @@ static inline void netif_addr_lock(struct net_device *dev) static inline void netif_addr_lock_nested(struct net_device *dev) { - spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); + int subclass = SINGLE_DEPTH_NESTING; + + if (dev->netdev_ops->ndo_get_lock_subclass) + subclass = dev->netdev_ops->ndo_get_lock_subclass(dev); + + spin_lock_nested(&dev->addr_list_lock, subclass); } static inline void netif_addr_lock_bh(struct net_device *dev) @@ -2892,13 +3110,15 @@ void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); -#define alloc_netdev(sizeof_priv, name, setup) \ - alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1) +#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) -#define alloc_netdev_mq(sizeof_priv, name, setup, count) \ - alloc_netdev_mqs(sizeof_priv, name, setup, count, count) +#define alloc_netdev_mq(sizeof_priv, name, name_assign_type, setup, count) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, count, \ + count) int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); @@ -2908,6 +3128,15 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)); +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)); void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ @@ -2928,6 +3157,38 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from); void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); +/** + * __dev_uc_sync - Synchonize device's unicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_uc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync); +} + +/** + * __dev_uc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_uc_sync(). + **/ +static inline void __dev_uc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->uc, dev, unsync); +} + /* Functions used for multicast addresses handling */ int dev_mc_add(struct net_device *dev, const unsigned char *addr); int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); @@ -2940,6 +3201,38 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from); void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); +/** + * __dev_mc_sync - Synchonize device's multicast list + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * Add newly added addresses to the interface, and release + * addresses that have been deleted. + **/ +static inline int __dev_mc_sync(struct net_device *dev, + int (*sync)(struct net_device *, + const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync); +} + +/** + * __dev_mc_unsync - Remove synchonized addresses from device + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by dev_mc_sync(). + **/ +static inline void __dev_mc_unsync(struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + __hw_addr_unsync_dev(&dev->mc, dev, unsync); +} + /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); void __dev_set_rx_mode(struct net_device *dev); @@ -2961,10 +3254,19 @@ extern int weight_p; extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); /* iterate through upper list, must be called under RCU read lock */ +#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ + for (iter = &(dev)->adj_list.upper, \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ + updev; \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) + +/* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->all_adj_list.upper, \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ @@ -2988,6 +3290,14 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) +void *netdev_lower_get_next(struct net_device *dev, + struct list_head **iter); +#define netdev_for_each_lower_dev(dev, ldev, iter) \ + for (iter = &(dev)->adj_list.lower, \ + ldev = netdev_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_lower_get_next(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); @@ -3003,6 +3313,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); @@ -3066,6 +3378,20 @@ const char *netdev_drivername(const struct net_device *dev); void linkwatch_run_queue(void); +static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, + netdev_features_t f2) +{ + if (f1 & NETIF_F_GEN_CSUM) + f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if (f2 & NETIF_F_GEN_CSUM) + f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= f2; + if (f1 & NETIF_F_GEN_CSUM) + f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + + return f1; +} + static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) { @@ -3091,12 +3417,7 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev); -static inline netdev_features_t netif_skb_features(struct sk_buff *skb) -{ - return netif_skb_dev_features(skb, skb->dev); -} +netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { @@ -3109,6 +3430,13 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_IPIP != (NETIF_F_GSO_IPIP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } @@ -3174,11 +3502,26 @@ extern struct pernet_operations __net_initdata loopback_net_ops; static inline const char *netdev_name(const struct net_device *dev) { - if (dev->reg_state != NETREG_REGISTERED) - return "(unregistered net_device)"; + if (!dev->name[0] || strchr(dev->name, '%')) + return "(unnamed net_device)"; return dev->name; } +static inline const char *netdev_reg_state(const struct net_device *dev) +{ + switch (dev->reg_state) { + case NETREG_UNINITIALIZED: return " (uninitialized)"; + case NETREG_REGISTERED: return ""; + case NETREG_UNREGISTERING: return " (unregistering)"; + case NETREG_UNREGISTERED: return " (unregistered)"; + case NETREG_RELEASED: return " (released)"; + case NETREG_DUMMY: return " (dummy)"; + } + + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + return " (unknown)"; +} + __printf(3, 4) int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); @@ -3235,7 +3578,8 @@ do { \ * file/line information and a backtrace. */ #define netdev_WARN(dev, format, args...) \ - WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args) + WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ + netdev_reg_state(dev), ##args) /* netif printk helpers, similar to netdev_printk */ |